diff --git a/sys/amd64/amd64/bpf_jit_machdep.c b/sys/amd64/amd64/bpf_jit_machdep.c index 6a5793eb81a..fe861d29227 100644 --- a/sys/amd64/amd64/bpf_jit_machdep.c +++ b/sys/amd64/amd64/bpf_jit_machdep.c @@ -419,62 +419,77 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) break; case BPF_JMP|BPF_JA: - JMP(stream.refs[stream.bpf_pc + ins->k] - - stream.refs[stream.bpf_pc]); + JUMP(ins->k); break; case BPF_JMP|BPF_JGT|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JA, JBE); break; case BPF_JMP|BPF_JGE|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JAE, JB); break; case BPF_JMP|BPF_JEQ|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JE, JNE); break; case BPF_JMP|BPF_JSET|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } TESTid(ins->k, EAX); JCC(JNE, JE); break; case BPF_JMP|BPF_JGT|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JA, JBE); break; case BPF_JMP|BPF_JGE|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JAE, JB); break; case BPF_JMP|BPF_JEQ|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JE, JNE); break; case BPF_JMP|BPF_JSET|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } TESTrd(EDX, EAX); JCC(JNE, JE); break; diff --git a/sys/amd64/amd64/bpf_jit_machdep.h b/sys/amd64/amd64/bpf_jit_machdep.h index aa7f342fb63..01c251f6db9 100644 --- a/sys/amd64/amd64/bpf_jit_machdep.h +++ b/sys/amd64/amd64/bpf_jit_machdep.h @@ -473,4 +473,10 @@ typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n); } \ } while (0) +#define JUMP(off) do { \ + if ((off) != 0) \ + JMP(stream.refs[stream.bpf_pc + (off)] - \ + stream.refs[stream.bpf_pc]); \ +} while (0) + #endif /* _BPF_JIT_MACHDEP_H_ */ diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index bbc304a48d6..7b1fed87a76 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -50,14 +50,14 @@ .bss .globl dtrace_invop_jump_addr .align 8 - .type dtrace_invop_jump_addr, @object - .size dtrace_invop_jump_addr, 8 + .type dtrace_invop_jump_addr,@object + .size dtrace_invop_jump_addr,8 dtrace_invop_jump_addr: .zero 8 .globl dtrace_invop_calltrap_addr .align 8 - .type dtrace_invop_calltrap_addr, @object - .size dtrace_invop_calltrap_addr, 8 + .type dtrace_invop_calltrap_addr,@object + .size dtrace_invop_calltrap_addr,8 dtrace_invop_calltrap_addr: .zero 8 #endif @@ -157,7 +157,6 @@ IDTVEC(align) * kernel from userland. Reenable interrupts if they were enabled * before the trap. This approximates SDT_SYS386TGT on the i386 port. */ - SUPERALIGN_TEXT .globl alltraps .type alltraps,@function @@ -211,16 +210,16 @@ alltraps_pushregs_no_rdi: * Set our jump address for the jump back in the event that * the breakpoint wasn't caused by DTrace at all. */ - movq $calltrap, dtrace_invop_calltrap_addr(%rip) + movq $calltrap,dtrace_invop_calltrap_addr(%rip) /* Jump to the code hooked in by DTrace. */ - movq dtrace_invop_jump_addr, %rax + movq dtrace_invop_jump_addr,%rax jmpq *dtrace_invop_jump_addr #endif .globl calltrap .type calltrap,@function calltrap: - movq %rsp, %rdi + movq %rsp,%rdi call trap MEXITCOUNT jmp doreti /* Handle any pending ASTs */ @@ -274,9 +273,11 @@ IDTVEC(dblfault) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs -1: movq %rsp, %rdi +1: + movq %rsp,%rdi call dblfault_handler -2: hlt +2: + hlt jmp 2b IDTVEC(page) @@ -369,7 +370,7 @@ IDTVEC(fast_syscall) movq %r15,TF_R15(%rsp) /* C preserved */ movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) - movq %rsp, %rdi + movq %rsp,%rdi call syscall movq PCPU(CURPCB),%rax andq $~PCB_FULLCTX,PCB_FLAGS(%rax) @@ -456,7 +457,7 @@ nmi_fromuserspace: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) - movq %rsp, %rdi + movq %rsp,%rdi call trap MEXITCOUNT #ifdef HWPMC_HOOKS @@ -555,9 +556,9 @@ nmi_restoreregs: iretq ENTRY(fork_trampoline) - movq %r12, %rdi /* function */ - movq %rbx, %rsi /* arg1 */ - movq %rsp, %rdx /* trapframe pointer */ + movq %r12,%rdi /* function */ + movq %rbx,%rsi /* arg1 */ + movq %rsp,%rdx /* trapframe pointer */ call fork_exit MEXITCOUNT jmp doreti /* Handle any ASTs */ @@ -628,7 +629,7 @@ doreti_ast: testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) je doreti_exit sti - movq %rsp, %rdi /* pass a pointer to the trapframe */ + movq %rsp,%rdi /* pass a pointer to the trapframe */ call ast jmp doreti_ast @@ -648,8 +649,8 @@ doreti_exit: * Do not reload segment registers for kernel. * Since we do not reload segments registers with sane * values on kernel entry, descriptors referenced by - * segments registers may be not valid. This is fatal - * for the usermode, but is innocent for the kernel. + * segments registers might be not valid. This is fatal + * for user mode, but is not a problem for the kernel. */ testb $SEL_RPL_MASK,TF_CS(%rsp) jz ld_regs @@ -662,14 +663,16 @@ do_segs: /* Restore %fs and fsbase */ movw TF_FS(%rsp),%ax .globl ld_fs -ld_fs: movw %ax,%fs +ld_fs: + movw %ax,%fs cmpw $KUF32SEL,%ax jne 1f movl $MSR_FSBASE,%ecx movl PCB_FSBASE(%r8),%eax movl PCB_FSBASE+4(%r8),%edx .globl ld_fsbase -ld_fsbase: wrmsr +ld_fsbase: + wrmsr 1: /* Restore %gs and gsbase */ movw TF_GS(%rsp),%si @@ -678,7 +681,8 @@ ld_fsbase: wrmsr movl $MSR_GSBASE,%ecx rdmsr .globl ld_gs -ld_gs: movw %si,%gs +ld_gs: + movw %si,%gs wrmsr popfq cmpw $KUG32SEL,%si @@ -687,12 +691,17 @@ ld_gs: movw %si,%gs movl PCB_GSBASE(%r8),%eax movl PCB_GSBASE+4(%r8),%edx .globl ld_gsbase -ld_gsbase: wrmsr -1: .globl ld_es -ld_es: movw TF_ES(%rsp),%es +ld_gsbase: + wrmsr +1: + .globl ld_es +ld_es: + movw TF_ES(%rsp),%es .globl ld_ds -ld_ds: movw TF_DS(%rsp),%ds -ld_regs:movq TF_RDI(%rsp),%rdi +ld_ds: + movw TF_DS(%rsp),%ds +ld_regs: + movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RDX(%rsp),%rdx movq TF_RCX(%rsp),%rcx @@ -711,7 +720,8 @@ ld_regs:movq TF_RDI(%rsp),%rdi jz 1f /* keep running with kernel GS.base */ cli swapgs -1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ +1: + addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ .globl doreti_iret doreti_iret: iretq @@ -738,7 +748,8 @@ doreti_iret_fault: testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti -1: movw %fs,TF_FS(%rsp) +1: + movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) @@ -768,7 +779,7 @@ doreti_iret_fault: .globl ds_load_fault ds_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movw $KUDSEL,TF_DS(%rsp) jmp doreti @@ -777,7 +788,7 @@ ds_load_fault: .globl es_load_fault es_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movw $KUDSEL,TF_ES(%rsp) jmp doreti @@ -786,7 +797,7 @@ es_load_fault: .globl fs_load_fault fs_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movw $KUF32SEL,TF_FS(%rsp) jmp doreti @@ -796,7 +807,7 @@ fs_load_fault: gs_load_fault: popfq movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movw $KUG32SEL,TF_GS(%rsp) jmp doreti @@ -805,7 +816,7 @@ gs_load_fault: .globl fsbase_load_fault fsbase_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 @@ -815,9 +826,8 @@ fsbase_load_fault: ALIGN_TEXT .globl gsbase_load_fault gsbase_load_fault: - popfq movl $T_PROTFLT,TF_TRAPNO(%rsp) - movq %rsp, %rdi + movq %rsp,%rdi call trap movq PCPU(CURTHREAD),%r8 movq TD_PCB(%r8),%r8 diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index c23102a5a2f..52e7568f7c0 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -240,7 +240,7 @@ printcpuinfo(void) printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ - "\002" + "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ @@ -264,7 +264,7 @@ printcpuinfo(void) "\027MOVBE" "\030POPCNT" "\031" - "\032" + "\032AESNI" /* AES Crypto*/ "\033XSAVE" "\034OSXSAVE" "\035" diff --git a/sys/amd64/amd64/io.c b/sys/amd64/amd64/io.c index 09d6e896582..c2d0d515555 100644 --- a/sys/amd64/amd64/io.c +++ b/sys/amd64/amd64/io.c @@ -28,60 +28,32 @@ __FBSDID("$FreeBSD$"); #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include - -#include -#include - #include +#include -/* ARGSUSED */ int -ioopen(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td) +iodev_open(struct thread *td) { - int error; - - error = priv_check(td, PRIV_IO); - if (error != 0) - return (error); - error = securelevel_gt(td->td_ucred, 0); - if (error != 0) - return (error); td->td_frame->tf_rflags |= PSL_IOPL; - return (0); } -/* ARGSUSED */ int -ioclose(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td) +iodev_close(struct thread *td) { + td->td_frame->tf_rflags &= ~PSL_IOPL; - return (0); } /* ARGSUSED */ int -ioioctl(struct cdev *dev __unused, u_long cmd __unused, caddr_t data __unused, - int fflag __unused, struct thread *td __unused) +iodev_ioctl(u_long cmd __unused, caddr_t data __unused) { - return (ENXIO); + return (ENOIOCTL); } diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c index 8edc971c95c..c8f60f06675 100644 --- a/sys/amd64/amd64/local_apic.c +++ b/sys/amd64/amd64/local_apic.c @@ -70,7 +70,7 @@ __FBSDID("$FreeBSD$"); #ifdef KDTRACE_HOOKS #include -cyclic_clock_func_t lapic_cyclic_clock_func[MAXCPU]; +cyclic_clock_func_t cyclic_clock_func[MAXCPU]; #endif /* Sanity checks on IDT vectors. */ @@ -778,8 +778,8 @@ lapic_handle_timer(struct trapframe *frame) * timers. */ int cpu = PCPU_GET(cpuid); - if (lapic_cyclic_clock_func[cpu] != NULL) - (*lapic_cyclic_clock_func[cpu])(frame); + if (cyclic_clock_func[cpu] != NULL) + (*cyclic_clock_func[cpu])(frame); #endif /* Fire hardclock at hz. */ diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 4b3f0413191..3cf15d1d4fe 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -236,6 +236,7 @@ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); @@ -283,7 +284,7 @@ pmap_kmem_choose(vm_offset_t addr) vm_offset_t newaddr = addr; newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); - return newaddr; + return (newaddr); } /********************/ @@ -294,7 +295,7 @@ pmap_kmem_choose(vm_offset_t addr) static __inline vm_pindex_t pmap_pde_pindex(vm_offset_t va) { - return va >> PDRSHIFT; + return (va >> PDRSHIFT); } @@ -353,7 +354,7 @@ pmap_pdpe(pmap_t pmap, vm_offset_t va) pml4e = pmap_pml4e(pmap, va); if ((*pml4e & PG_V) == 0) - return NULL; + return (NULL); return (pmap_pml4e_to_pdpe(pml4e, va)); } @@ -375,7 +376,7 @@ pmap_pde(pmap_t pmap, vm_offset_t va) pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) - return NULL; + return (NULL); return (pmap_pdpe_to_pde(pdpe, va)); } @@ -397,12 +398,27 @@ pmap_pte(pmap_t pmap, vm_offset_t va) pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) - return NULL; + return (NULL); if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ return ((pt_entry_t *)pde); return (pmap_pde_to_pte(pde, va)); } +static __inline void +pmap_resident_count_inc(pmap_t pmap, int count) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pmap->pm_stats.resident_count += count; +} + +static __inline void +pmap_resident_count_dec(pmap_t pmap, int count) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + pmap->pm_stats.resident_count -= count; +} PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) @@ -777,7 +793,6 @@ static u_long pmap_pdpe_demotions; SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); - /*************************************************** * Low level helper routines..... ***************************************************/ @@ -1184,15 +1199,20 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde, *pdep; pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1201,12 +1221,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1236,7 +1258,7 @@ pmap_kextract(vm_offset_t va) pa = (pa & PG_FRAME) | (va & PAGE_MASK); } } - return pa; + return (pa); } /*************************************************** @@ -1466,9 +1488,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free) --m->wire_count; if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, va, m, free); + return (_pmap_unwire_pte_hold(pmap, va, m, free)); else - return 0; + return (0); } static int @@ -1476,6 +1498,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free) { + PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ @@ -1495,7 +1518,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, pd = pmap_pde(pmap, va); *pd = 0; } - --pmap->pm_stats.resident_count; + pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; @@ -1524,7 +1547,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, */ pmap_add_delayed_free_list(m, free, TRUE); - return 1; + return (1); } /* @@ -1537,10 +1560,10 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free) vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) - return 0; + return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, va, mpte, free); + return (pmap_unwire_pte_hold(pmap, va, mpte, free)); } void @@ -1612,6 +1635,7 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ @@ -1729,9 +1753,9 @@ _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); - return m; + return (m); } static vm_page_t @@ -2036,7 +2060,7 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, va); KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" " a 2mpage in page %p's pv list", m)); @@ -2437,7 +2461,7 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) return (FALSE); } if (va < VM_MAXUSER_ADDRESS) - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -2529,7 +2553,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); @@ -2552,7 +2576,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; @@ -2583,7 +2607,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); - pmap->pm_stats.resident_count -= 1; + pmap_resident_count_dec(pmap, 1); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) @@ -2775,9 +2799,9 @@ pmap_remove_all(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); @@ -2785,7 +2809,7 @@ pmap_remove_all(vm_page_t m) while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); @@ -2832,18 +2856,9 @@ retry: if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); - va < eva; va += PAGE_SIZE, m++) { - /* - * In contrast to the analogous operation on a 4KB page - * mapping, the mapping's PG_A flag is not cleared and - * the page's PG_REFERENCED flag is not set. The - * reason is that pmap_demote_pde() expects that a 2MB - * page mapping with a stored page table page has PG_A - * set. - */ + va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); @@ -2952,23 +2967,15 @@ retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits & - PG_FRAME); vm_page_dirty(m); } - } - - if ((prot & VM_PROT_WRITE) == 0) pbits &= ~(PG_RW | PG_M); + } if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -3142,9 +3149,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, * In the case that a page table page is not * resident, we are creating it here. */ - if (va < VM_MAXUSER_ADDRESS) { + if (va < VM_MAXUSER_ADDRESS) mpte = pmap_allocpte(pmap, va, M_WAITOK); - } pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { @@ -3208,7 +3214,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, " va: 0x%lx", va)); } } else - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); /* * Enter on the PV list if part of our managed memory. @@ -3342,7 +3348,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) /* * Increment counters. */ - pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; + pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* * Map the superpage. @@ -3392,7 +3398,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, mpte); m = TAILQ_NEXT(m, listq); } - PMAP_UNLOCK(pmap); + PMAP_UNLOCK(pmap); } /* @@ -3496,7 +3502,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, /* * Increment counters */ - pmap->pm_stats.resident_count++; + pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) @@ -3509,7 +3515,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); - return mpte; + return (mpte); } /* @@ -3601,8 +3607,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); - pmap->pm_stats.resident_count += NBPDR / - PAGE_SIZE; + pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); pmap_pde_mappings++; } else { /* Continue on if the PDE is already valid. */ @@ -3669,8 +3674,6 @@ out: PMAP_UNLOCK(pmap); } - - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len @@ -3745,8 +3748,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { *pde = srcptepaddr & ~PG_W; - dst_pmap->pm_stats.resident_count += - NBPDR / PAGE_SIZE; + pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); } else dstmpde->wire_count--; continue; @@ -3789,7 +3791,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); - dst_pmap->pm_stats.resident_count++; + pmap_resident_count_inc(dst_pmap, 1); } else { free = NULL; if (pmap_unwire_pte_hold(dst_pmap, @@ -3886,12 +3888,12 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) int loops = 0; if (m->flags & PG_FICTITIOUS) - return FALSE; + return (FALSE); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { - return TRUE; + return (TRUE); } loops++; if (loops >= 16) @@ -3959,16 +3961,15 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int count) boolean_t pmap_page_is_mapped(vm_page_t m) { - struct md_page *pvh; + boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - return (!TAILQ_EMPTY(&pvh->pv_list)); - } else - return (TRUE); + vm_page_lock_queues(); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); + vm_page_unlock_queues(); + return (rv); } /* @@ -4062,7 +4063,7 @@ pmap_remove_pages(pmap_t pmap) pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { - pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); if (TAILQ_EMPTY(&pvh->pv_list)) { @@ -4073,7 +4074,7 @@ pmap_remove_pages(pmap_t pmap) mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; @@ -4081,7 +4082,7 @@ pmap_remove_pages(pmap_t pmap) atomic_subtract_int(&cnt.v_wire_count, 1); } } else { - pmap->pm_stats.resident_count--; + pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4177,6 +4178,49 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) return (rv); } +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + if (m->flags & PG_FICTITIOUS) + return (FALSE); + if (pmap_is_referenced_pvh(&m->md)) + return (TRUE); + return (pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and 2mpage mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rv = FALSE; + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + return (rv); +} + /* * Clear the write and modified bits in each of the given page's mappings. */ @@ -4196,9 +4240,9 @@ pmap_remove_write(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); @@ -4254,9 +4298,9 @@ pmap_ts_referenced(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { @@ -4326,9 +4370,9 @@ pmap_clear_modify(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { @@ -4391,9 +4435,9 @@ pmap_clear_reference(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { - va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); + va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { @@ -4864,7 +4908,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) if (pte != 0) { val |= MINCORE_INCORE; if ((pte & PG_MANAGED) == 0) - return val; + return (val); m = PHYS_TO_VM_PAGE(pa); @@ -4893,14 +4937,12 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) */ vm_page_lock_queues(); if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { + pmap_is_referenced(m)) val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } vm_page_unlock_queues(); } } - return val; + return (val); } void diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 8acde03560c..f3dba9459ed 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -172,52 +172,6 @@ SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, extern char *syscallnames[]; -/* #define DEBUG 1 */ -#ifdef DEBUG -static void -report_seg_fault(const char *segn, struct trapframe *frame) -{ - struct proc_ldt *pldt; - struct trapframe *pf; - - pldt = curproc->p_md.md_ldt; - printf("%d: %s load fault %lx %p %d\n", - curproc->p_pid, segn, frame->tf_err, - pldt != NULL ? pldt->ldt_base : NULL, - pldt != NULL ? pldt->ldt_refcnt : 0); - kdb_backtrace(); - pf = (struct trapframe *)frame->tf_rsp; - printf("rdi %lx\n", pf->tf_rdi); - printf("rsi %lx\n", pf->tf_rsi); - printf("rdx %lx\n", pf->tf_rdx); - printf("rcx %lx\n", pf->tf_rcx); - printf("r8 %lx\n", pf->tf_r8); - printf("r9 %lx\n", pf->tf_r9); - printf("rax %lx\n", pf->tf_rax); - printf("rbx %lx\n", pf->tf_rbx); - printf("rbp %lx\n", pf->tf_rbp); - printf("r10 %lx\n", pf->tf_r10); - printf("r11 %lx\n", pf->tf_r11); - printf("r12 %lx\n", pf->tf_r12); - printf("r13 %lx\n", pf->tf_r13); - printf("r14 %lx\n", pf->tf_r14); - printf("r15 %lx\n", pf->tf_r15); - printf("fs %x\n", pf->tf_fs); - printf("gs %x\n", pf->tf_gs); - printf("es %x\n", pf->tf_es); - printf("ds %x\n", pf->tf_ds); - printf("tno %x\n", pf->tf_trapno); - printf("adr %lx\n", pf->tf_addr); - printf("flg %x\n", pf->tf_flags); - printf("err %lx\n", pf->tf_err); - printf("rip %lx\n", pf->tf_rip); - printf("cs %lx\n", pf->tf_cs); - printf("rfl %lx\n", pf->tf_rflags); - printf("rsp %lx\n", pf->tf_rsp); - printf("ss %lx\n", pf->tf_ss); -} -#endif - /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry @@ -314,9 +268,7 @@ trap(struct trapframe *frame) */ printf("kernel trap %d with interrupts disabled\n", type); -#ifdef DEBUG - report_seg_fault("hlt", frame); -#endif + /* * We shouldn't enable interrupts while holding a * spin lock or servicing an NMI. @@ -535,33 +487,21 @@ trap(struct trapframe *frame) goto out; } if (frame->tf_rip == (long)ld_ds) { -#ifdef DEBUG - report_seg_fault("ds", frame); -#endif frame->tf_rip = (long)ds_load_fault; frame->tf_ds = _udatasel; goto out; } if (frame->tf_rip == (long)ld_es) { -#ifdef DEBUG - report_seg_fault("es", frame); -#endif frame->tf_rip = (long)es_load_fault; frame->tf_es = _udatasel; goto out; } if (frame->tf_rip == (long)ld_fs) { -#ifdef DEBUG - report_seg_fault("fs", frame); -#endif frame->tf_rip = (long)fs_load_fault; frame->tf_fs = _ufssel; goto out; } if (frame->tf_rip == (long)ld_gs) { -#ifdef DEBUG - report_seg_fault("gs", frame); -#endif frame->tf_rip = (long)gs_load_fault; frame->tf_gs = _ugssel; goto out; @@ -667,30 +607,6 @@ trap(struct trapframe *frame) ksi.ksi_addr = (void *)addr; trapsignal(td, &ksi); -#ifdef DEBUG -{ - register_t rg,rgk, rf; - - if (type <= MAX_TRAP_MSG) { - uprintf("fatal process exception: %s", - trap_msg[type]); - if ((type == T_PAGEFLT) || (type == T_PROTFLT)) - uprintf(", fault VA = 0x%lx", frame->tf_addr); - uprintf("\n"); - } - rf = rdmsr(0xc0000100); - rg = rdmsr(0xc0000101); - rgk = rdmsr(0xc0000102); - uprintf("pid %d TRAP %d rip %lx err %lx addr %lx cs %lx ss %lx ds %x " - "es %x fs %x fsbase %lx %lx gs %x gsbase %lx %lx %lx\n", - curproc->p_pid, type, frame->tf_rip, frame->tf_err, - frame->tf_addr, - frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es, - frame->tf_fs, td->td_pcb->pcb_fsbase, rf, - frame->tf_gs, td->td_pcb->pcb_gsbase, rg, rgk); -} -#endif - user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 0d5cad781e4..b5a1930dbba 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -224,6 +224,7 @@ device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') +device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) @@ -280,6 +281,7 @@ device firmware # firmware assist module device bpf # Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index 8033443ffad..859b9923f68 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -141,9 +141,11 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) mcp->mc_esi = tp->tf_rsi; mcp->mc_ebp = tp->tf_rbp; mcp->mc_isp = tp->tf_rsp; + mcp->mc_eflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; + mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_rax; mcp->mc_edx = tp->tf_rdx; @@ -152,7 +154,6 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) mcp->mc_ecx = tp->tf_rcx; mcp->mc_eip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; - mcp->mc_eflags = tp->tf_rflags; mcp->mc_esp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); diff --git a/sys/amd64/include/iodev.h b/sys/amd64/include/iodev.h index 1a0a17a9561..9f53cacff00 100644 --- a/sys/amd64/include/iodev.h +++ b/sys/amd64/include/iodev.h @@ -25,7 +25,22 @@ * * $FreeBSD$ */ +#ifndef _MACHINE_IODEV_H_ +#define _MACHINE_IODEV_H_ -d_open_t ioopen; -d_close_t ioclose; -d_ioctl_t ioioctl; +#ifdef _KERNEL +#include + +#define iodev_read_1 inb +#define iodev_read_2 inw +#define iodev_read_4 inl +#define iodev_write_1 outb +#define iodev_write_2 outw +#define iodev_write_4 outl + +int iodev_open(struct thread *td); +int iodev_close(struct thread *td); +int iodev_ioctl(u_long cmd, caddr_t data); + +#endif /* _KERNEL */ +#endif /* _MACHINE_IODEV_H_ */ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 11a5628082f..6f0b188e9b6 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -245,6 +245,8 @@ struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ u_int pm_active; /* active on cpus */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index 33d5181d2d5..acea4c0a77f 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -53,6 +53,9 @@ struct mdproc { struct system_segment_descriptor md_ldt_sd; }; +#define KINFO_PROC_SIZE 1088 +#define KINFO_PROC32_SIZE 768 + #ifdef _KERNEL /* Get the current kernel thread stack usage. */ diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 86a08cec931..895619cf6f7 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -113,6 +113,7 @@ #define CPUID_PBE 0x80000000 #define CPUID2_SSE3 0x00000001 +#define CPUID2_PCLMULQDQ 0x00000002 #define CPUID2_DTES64 0x00000004 #define CPUID2_MON 0x00000008 #define CPUID2_DS_CPL 0x00000010 @@ -131,6 +132,7 @@ #define CPUID2_X2APIC 0x00200000 #define CPUID2_MOVBE 0x00400000 #define CPUID2_POPCNT 0x00800000 +#define CPUID2_AESNI 0x02000000 /* * Important bits in the AMD extended cpuid flags diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 0b5004c6818..6dbe3713767 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -145,6 +145,10 @@ #define VM_LEVEL_0_ORDER 9 #endif +#ifdef SMP +#define PA_LOCK_COUNT 256 +#endif + /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h index b3a6672576c..d03d4f685e2 100644 --- a/sys/amd64/include/xen/xenfunc.h +++ b/sys/amd64/include/xen/xenfunc.h @@ -1,6 +1,5 @@ -/* - * - * Copyright (c) 2004,2005 Kip Macy +/*- + * Copyright (c) 2004, 2005 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -11,22 +10,22 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ */ - #ifndef _XEN_XENFUNC_H_ #define _XEN_XENFUNC_H_ diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h index 1433b76871e..d9dbc5d9186 100644 --- a/sys/amd64/include/xen/xenvar.h +++ b/sys/amd64/include/xen/xenvar.h @@ -1,29 +1,27 @@ -/* +/*- * Copyright (c) 2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ */ diff --git a/sys/arm/arm/cpufunc.c b/sys/arm/arm/cpufunc.c index 2f4c3a1f58a..09c20264819 100644 --- a/sys/arm/arm/cpufunc.c +++ b/sys/arm/arm/cpufunc.c @@ -783,69 +783,66 @@ struct cpu_functions xscalec3_cpufuncs = { #endif /* CPU_XSCALE_81342 */ -#if defined(CPU_FA526) +#if defined(CPU_FA526) || defined(CPU_FA626TE) struct cpu_functions fa526_cpufuncs = { /* CPU functions */ - .cf_id = cpufunc_id, - .cf_cpwait = cpufunc_nullop, + cpufunc_id, /* id */ + cpufunc_nullop, /* cpwait */ /* MMU functions */ - .cf_control = cpufunc_control, - .cf_domains = cpufunc_domains, - .cf_setttb = fa526_setttb, - .cf_faultstatus = cpufunc_faultstatus, - .cf_faultaddress = cpufunc_faultaddress, + cpufunc_control, /* control */ + cpufunc_domains, /* domain */ + fa526_setttb, /* setttb */ + cpufunc_faultstatus, /* faultstatus */ + cpufunc_faultaddress, /* faultaddress */ /* TLB functions */ - .cf_tlb_flushID = armv4_tlb_flushID, - .cf_tlb_flushID_SE = fa526_tlb_flushID_SE, - .cf_tlb_flushI = armv4_tlb_flushI, - .cf_tlb_flushI_SE = fa526_tlb_flushI_SE, - .cf_tlb_flushD = armv4_tlb_flushD, - .cf_tlb_flushD_SE = armv4_tlb_flushD_SE, + armv4_tlb_flushID, /* tlb_flushID */ + fa526_tlb_flushID_SE, /* tlb_flushID_SE */ + armv4_tlb_flushI, /* tlb_flushI */ + fa526_tlb_flushI_SE, /* tlb_flushI_SE */ + armv4_tlb_flushD, /* tlb_flushD */ + armv4_tlb_flushD_SE, /* tlb_flushD_SE */ /* Cache operations */ - .cf_icache_sync_all = fa526_icache_sync_all, - .cf_icache_sync_range = fa526_icache_sync_range, + fa526_icache_sync_all, /* icache_sync_all */ + fa526_icache_sync_range, /* icache_sync_range */ - .cf_dcache_wbinv_all = fa526_dcache_wbinv_all, - .cf_dcache_wbinv_range = fa526_dcache_wbinv_range, - .cf_dcache_inv_range = fa526_dcache_inv_range, - .cf_dcache_wb_range = fa526_dcache_wb_range, - - .cf_idcache_wbinv_all = fa526_idcache_wbinv_all, - .cf_idcache_wbinv_range = fa526_idcache_wbinv_range, - - - .cf_l2cache_wbinv_all = cpufunc_nullop, - .cf_l2cache_wbinv_range = (void *)cpufunc_nullop, - .cf_l2cache_inv_range = (void *)cpufunc_nullop, - .cf_l2cache_wb_range = (void *)cpufunc_nullop, + fa526_dcache_wbinv_all, /* dcache_wbinv_all */ + fa526_dcache_wbinv_range, /* dcache_wbinv_range */ + fa526_dcache_inv_range, /* dcache_inv_range */ + fa526_dcache_wb_range, /* dcache_wb_range */ + fa526_idcache_wbinv_all, /* idcache_wbinv_all */ + fa526_idcache_wbinv_range, /* idcache_wbinv_range */ + cpufunc_nullop, /* l2cache_wbinv_all */ + (void *)cpufunc_nullop, /* l2cache_wbinv_range */ + (void *)cpufunc_nullop, /* l2cache_inv_range */ + (void *)cpufunc_nullop, /* l2cache_wb_range */ /* Other functions */ - .cf_flush_prefetchbuf = fa526_flush_prefetchbuf, - .cf_drain_writebuf = armv4_drain_writebuf, - .cf_flush_brnchtgt_C = cpufunc_nullop, - .cf_flush_brnchtgt_E = fa526_flush_brnchtgt_E, + fa526_flush_prefetchbuf, /* flush_prefetchbuf */ + armv4_drain_writebuf, /* drain_writebuf */ + cpufunc_nullop, /* flush_brnchtgt_C */ + fa526_flush_brnchtgt_E, /* flush_brnchtgt_E */ - .cf_sleep = fa526_cpu_sleep, + fa526_cpu_sleep, /* sleep */ /* Soft functions */ - .cf_dataabt_fixup = cpufunc_null_fixup, - .cf_prefetchabt_fixup = cpufunc_null_fixup, + cpufunc_null_fixup, /* dataabt_fixup */ + cpufunc_null_fixup, /* prefetchabt_fixup */ - .cf_context_switch = fa526_context_switch, + fa526_context_switch, /* context_switch */ - .cf_setup = fa526_setup -}; -#endif /* CPU_FA526 */ + fa526_setup /* cpu setup */ +}; +#endif /* CPU_FA526 || CPU_FA626TE */ /* @@ -856,11 +853,11 @@ struct cpu_functions cpufuncs; u_int cputype; u_int cpu_reset_needs_v4_MMU_disable; /* flag used in locore.s */ -#if defined(CPU_ARM7TDMI) || defined(CPU_ARM8) || defined(CPU_ARM9) || \ - defined (CPU_ARM9E) || defined (CPU_ARM10) || \ - defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ - defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ - defined(CPU_FA526) || \ +#if defined(CPU_ARM7TDMI) || defined(CPU_ARM8) || defined(CPU_ARM9) || \ + defined (CPU_ARM9E) || defined (CPU_ARM10) || \ + defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ + defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ + defined(CPU_FA526) || defined(CPU_FA626TE) || \ defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) static void get_cachetype_cp15(void); @@ -1141,8 +1138,8 @@ set_cpufuncs() goto out; } #endif /* CPU_SA1110 */ -#ifdef CPU_FA526 - if (cputype == CPU_ID_FA526) { +#if defined(CPU_FA526) || defined(CPU_FA626TE) + if (cputype == CPU_ID_FA526 || cputype == CPU_ID_FA626TE) { cpufuncs = fa526_cpufuncs; cpu_reset_needs_v4_MMU_disable = 1; /* SA needs it */ get_cachetype_cp15(); @@ -1153,7 +1150,7 @@ set_cpufuncs() goto out; } -#endif /* CPU_FA526 */ +#endif /* CPU_FA526 || CPU_FA626TE */ #ifdef CPU_IXP12X0 if (cputype == CPU_ID_IXP1200) { cpufuncs = ixp12x0_cpufuncs; @@ -1629,7 +1626,7 @@ late_abort_fixup(arg) defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) || \ defined(CPU_ARM10) || defined(CPU_ARM11) || \ - defined(CPU_FA526) + defined(CPU_FA526) || defined(CPU_FA626TE) #define IGN 0 #define OR 1 @@ -2095,7 +2092,7 @@ sa11x0_setup(args) } #endif /* CPU_SA1100 || CPU_SA1110 */ -#if defined(CPU_FA526) +#if defined(CPU_FA526) || defined(CPU_FA626TE) struct cpu_option fa526_options[] = { #ifdef COMPAT_12 { "nocache", IGN, BIC, (CPU_CONTROL_IC_ENABLE | @@ -2149,7 +2146,7 @@ fa526_setup(char *args) ctrl = cpuctrl; cpu_control(0xffffffff, cpuctrl); } -#endif /* CPU_FA526 */ +#endif /* CPU_FA526 || CPU_FA626TE */ #if defined(CPU_IXP12X0) diff --git a/sys/arm/arm/cpufunc_asm_fa526.S b/sys/arm/arm/cpufunc_asm_fa526.S index d68d7a6f26b..d53d29a1cfb 100644 --- a/sys/arm/arm/cpufunc_asm_fa526.S +++ b/sys/arm/arm/cpufunc_asm_fa526.S @@ -32,7 +32,11 @@ #include __FBSDID("$FreeBSD$"); +#ifdef CPU_FA526 #define CACHELINE_SIZE 16 +#else +#define CACHELINE_SIZE 32 +#endif ENTRY(fa526_setttb) mov r1, #0 diff --git a/sys/arm/arm/elf_trampoline.c b/sys/arm/arm/elf_trampoline.c index 6addbc387d5..0f725c8d8b6 100644 --- a/sys/arm/arm/elf_trampoline.c +++ b/sys/arm/arm/elf_trampoline.c @@ -57,7 +57,7 @@ void __startC(void); #define cpu_idcache_wbinv_all arm8_cache_purgeID #elif defined(CPU_ARM9) #define cpu_idcache_wbinv_all arm9_idcache_wbinv_all -#elif defined(CPU_FA526) +#elif defined(CPU_FA526) || defined(CPU_FA626TE) #define cpu_idcache_wbinv_all fa526_idcache_wbinv_all #elif defined(CPU_ARM9E) #define cpu_idcache_wbinv_all armv5_ec_idcache_wbinv_all diff --git a/sys/arm/arm/identcpu.c b/sys/arm/arm/identcpu.c index 6d2a0f01c44..1a9c996bab0 100644 --- a/sys/arm/arm/identcpu.c +++ b/sys/arm/arm/identcpu.c @@ -220,7 +220,9 @@ const struct cpuidtab cpuids[] = { generic_steppings }, { CPU_ID_ARM966ESR1, CPU_CLASS_ARM9ES, "ARM966E-S", generic_steppings }, - { CPU_ID_FA526, CPU_CLASS_ARM9, "FA526", + { CPU_ID_FA526, CPU_CLASS_ARM9, "FA526", + generic_steppings }, + { CPU_ID_FA626TE, CPU_CLASS_ARM9ES, "FA626TE", generic_steppings }, { CPU_ID_TI925T, CPU_CLASS_ARM9TDMI, "TI ARM925T", diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 19b85d88a37..8fe6424a89a 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -3740,13 +3740,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa; + vm_paddr_t pa, paddr; vm_page_t m = NULL; u_int l1idx; l1idx = L1_IDX(va); + paddr = 0; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3758,6 +3759,8 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); @@ -3774,7 +3777,6 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } @@ -3783,7 +3785,6 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) if (pte == 0) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { @@ -3796,13 +3797,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); break; } + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + PA_UNLOCK_COND(paddr); return (m); } @@ -4492,6 +4495,20 @@ pmap_clear_modify(vm_page_t m) } +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + return ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && + (m->md.pvh_attrs & PVF_REF) != 0); +} + /* * pmap_clear_reference: * diff --git a/sys/arm/at91/if_ate.c b/sys/arm/at91/if_ate.c index 2f6b741cc10..b0654823470 100644 --- a/sys/arm/at91/if_ate.c +++ b/sys/arm/at91/if_ate.c @@ -272,8 +272,8 @@ ate_attach(device_t dev) ifp->if_ioctl = ateioctl; ifp->if_init = ateinit; ifp->if_baudrate = 10000000; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ifp->if_linkmib = &sc->mibdata; ifp->if_linkmiblen = sizeof(sc->mibdata); diff --git a/sys/arm/conf/DB-78XXX b/sys/arm/conf/DB-78XXX index 939ae2b96c5..b82f1185087 100644 --- a/sys/arm/conf/DB-78XXX +++ b/sys/arm/conf/DB-78XXX @@ -67,6 +67,7 @@ device e1000phy device bpf # USB +options USB_DEBUG # enable debug msgs device usb device ehci device umass diff --git a/sys/arm/conf/DB-88F5XXX b/sys/arm/conf/DB-88F5XXX index 41f32b1586a..edb5fe8c252 100644 --- a/sys/arm/conf/DB-88F5XXX +++ b/sys/arm/conf/DB-88F5XXX @@ -74,6 +74,7 @@ device iicbus device ds133x # USB +options USB_DEBUG # enable debug msgs device usb device ehci device umass diff --git a/sys/arm/conf/DB-88F6XXX b/sys/arm/conf/DB-88F6XXX index c0857fda624..1b4b7fd5ca5 100644 --- a/sys/arm/conf/DB-88F6XXX +++ b/sys/arm/conf/DB-88F6XXX @@ -67,6 +67,7 @@ device e1000phy device bpf # USB +options USB_DEBUG # enable debug msgs device usb device ehci device umass diff --git a/sys/arm/conf/HL200 b/sys/arm/conf/HL200 index f50f64cd622..dd46a61c2fd 100644 --- a/sys/arm/conf/HL200 +++ b/sys/arm/conf/HL200 @@ -95,6 +95,7 @@ device icee device bpf # USB support +options USB_DEBUG # enable debug msgs device ohci # OHCI localbus->USB interface device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices diff --git a/sys/arm/conf/KB920X b/sys/arm/conf/KB920X index 336228df419..f47e9acf09b 100644 --- a/sys/arm/conf/KB920X +++ b/sys/arm/conf/KB920X @@ -96,6 +96,7 @@ device icee device bpf # USB support +options USB_DEBUG # enable debug msgs device ohci # OHCI localbus->USB interface device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices diff --git a/sys/arm/conf/LN2410SBC b/sys/arm/conf/LN2410SBC index 5a14f591c8c..e4f3a5445c6 100644 --- a/sys/arm/conf/LN2410SBC +++ b/sys/arm/conf/LN2410SBC @@ -79,6 +79,7 @@ options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed device md +options USB_DEBUG # enable debug msgs device usb device ohci device umass diff --git a/sys/arm/conf/SHEEVAPLUG b/sys/arm/conf/SHEEVAPLUG index d11b34d57b3..dcd82e5549d 100644 --- a/sys/arm/conf/SHEEVAPLUG +++ b/sys/arm/conf/SHEEVAPLUG @@ -61,6 +61,7 @@ options DEVICE_POLLING device vlan # USB +options USB_DEBUG # enable debug msgs device usb device ehci device umass diff --git a/sys/arm/include/cpuconf.h b/sys/arm/include/cpuconf.h index 942b5916933..c8a9b6e191e 100644 --- a/sys/arm/include/cpuconf.h +++ b/sys/arm/include/cpuconf.h @@ -62,6 +62,7 @@ defined(CPU_XSCALE_80321) + \ defined(CPU_XSCALE_PXA2X0) + \ defined(CPU_FA526) + \ + defined(CPU_FA626TE) + \ defined(CPU_XSCALE_IXP425)) /* @@ -78,7 +79,7 @@ #if (defined(CPU_ARM9E) || defined(CPU_ARM10) || \ defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) || \ - defined(CPU_XSCALE_PXA2X0)) + defined(CPU_XSCALE_PXA2X0) || defined(CPU_FA626TE)) #define ARM_ARCH_5 1 #else #define ARM_ARCH_5 0 @@ -126,7 +127,8 @@ #if (defined(CPU_ARM6) || defined(CPU_ARM7) || defined(CPU_ARM7TDMI) || \ defined(CPU_ARM8) || defined(CPU_ARM9) || defined(CPU_ARM9E) || \ - defined(CPU_ARM10) || defined(CPU_ARM11) || defined(CPU_FA526)) + defined(CPU_ARM10) || defined(CPU_ARM11) || defined(CPU_FA526) || \ + defined(CPU_FA626TE)) #define ARM_MMU_GENERIC 1 #else #define ARM_MMU_GENERIC 0 diff --git a/sys/arm/include/cpufunc.h b/sys/arm/include/cpufunc.h index 84949668535..4ab63fbe3fa 100644 --- a/sys/arm/include/cpufunc.h +++ b/sys/arm/include/cpufunc.h @@ -284,7 +284,7 @@ u_int arm8_clock_config (u_int, u_int); #endif -#ifdef CPU_FA526 +#if defined(CPU_FA526) || defined(CPU_FA626TE) void fa526_setup (char *arg); void fa526_setttb (u_int ttb); void fa526_context_switch (void); @@ -464,11 +464,11 @@ extern unsigned armv5_dcache_index_max; extern unsigned armv5_dcache_index_inc; #endif -#if defined(CPU_ARM9) || defined(CPU_ARM9E) || defined(CPU_ARM10) || \ - defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) || \ - defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ - defined(CPU_FA526) || \ - defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ +#if defined(CPU_ARM9) || defined(CPU_ARM9E) || defined(CPU_ARM10) || \ + defined(CPU_SA110) || defined(CPU_SA1100) || defined(CPU_SA1110) || \ + defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ + defined(CPU_FA526) || defined(CPU_FA626TE) || \ + defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ defined(CPU_XSCALE_80219) || defined(CPU_XSCALE_81342) void armv4_tlb_flushID (void); diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h index 8ee7bac27e2..287c4c1206a 100644 --- a/sys/arm/include/pmap.h +++ b/sys/arm/include/pmap.h @@ -134,6 +134,8 @@ struct pmap { struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; pd_entry_t *pm_pdir; /* KVA of page directory */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ diff --git a/sys/arm/include/proc.h b/sys/arm/include/proc.h index c7b2a4ef5c9..08857446430 100644 --- a/sys/arm/include/proc.h +++ b/sys/arm/include/proc.h @@ -60,4 +60,6 @@ struct mdproc { void *md_sigtramp; }; +#define KINFO_PROC_SIZE 792 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/arm/mv/files.mv b/sys/arm/mv/files.mv index 36be745f147..d7352b79b14 100644 --- a/sys/arm/mv/files.mv +++ b/sys/arm/mv/files.mv @@ -30,6 +30,7 @@ arm/mv/timer.c standard arm/mv/twsi.c optional iicbus dev/mge/if_mge.c optional mge +dev/mvs/mvs_soc.c optional mvs dev/uart/uart_bus_mbus.c optional uart dev/uart/uart_cpu_mv.c optional uart dev/uart/uart_dev_ns8250.c optional uart diff --git a/sys/arm/xscale/ixp425/if_npe.c b/sys/arm/xscale/ixp425/if_npe.c index 5b605aeb6d2..3d8d670eec9 100644 --- a/sys/arm/xscale/ixp425/if_npe.c +++ b/sys/arm/xscale/ixp425/if_npe.c @@ -360,7 +360,7 @@ npe_attach(device_t dev) ifp->if_ioctl = npeioctl; ifp->if_init = npeinit; IFQ_SET_MAXLEN(&ifp->if_snd, sc->txdma.nbuf - 1); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ifp->if_linkmib = &sc->mibdata; ifp->if_linkmiblen = sizeof(sc->mibdata); diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf index d7f1323be9b..d48d8c44529 100644 --- a/sys/boot/forth/loader.conf +++ b/sys/boot/forth/loader.conf @@ -197,8 +197,6 @@ if_epair_load="NO" # Virtual b-t-b Ethernet-like interface pair if_faith_load="NO" # IPv6-to-IPv4 TCP relay capturing interface if_gif_load="NO" # generic tunnel interface if_gre_load="NO" # encapsulating network device -if_ppp_load="NO" # Kernel ppp -if_sl_load="NO" # SLIP if_stf_load="NO" # 6to4 tunnel interface if_tap_load="NO" # Ethernet tunnel software network interface if_tun_load="NO" # Tunnel driver (user process ppp) @@ -217,7 +215,6 @@ if_age_load="NO" # Attansic/Atheros L1 Gigabit Ethernet if_alc_load="NO" # Atheros AR8131/AR8132 Ethernet if_ale_load="NO" # Atheros AR8121/AR8113/AR8114 Ethernet if_an_load="NO" # Aironet 4500/4800 802.11 wireless NICs -if_ar_load="NO" # Digi SYNC/570i if_ath_load="NO" # Atheros IEEE 802.11 wireless NICs if_aue_load="NO" # ADMtek AN986 Pegasus USB Ethernet if_awi_load="NO" # AMD PCnetMobile IEEE 802.11 wireless NICs @@ -237,6 +234,7 @@ if_ed_load="NO" # National Semiconductor DS8390/WD83C690 if_em_load="NO" # Intel(R) PRO/1000 Gigabit Ethernet if_en_load="NO" # Midway-based ATM interfaces if_ep_load="NO" # 3Com Etherlink III (3c5x9) +if_et_load="NO" # Agere ET1310 10/100/Gigabit Ethernet if_ex_load="NO" # Intel EtherExpress Pro/10 Ethernet if_fe_load="NO" # Fujitsu MB86960A/MB86965A based Ethernet # adapters @@ -265,16 +263,15 @@ if_nve_load="NO" # NVIDIA nForce MCP Networking Adapter if_nxge_load="NO" # Neterion Xframe 10Gb Ethernet if_pcn_load="NO" # AMD PCnet PCI if_ral_load="NO" # Ralink Technology wireless -if_ray_load="NO" # Raytheon Raylink/Webgear Aviator PCCard if_re_load="NO" # RealTek 8139C+/8169/8169S/8110S if_rl_load="NO" # RealTek 8129/8139 if_rue_load="NO" # RealTek RTL8150 USB to Fast Ethernet if_sbni_load="NO" # Granch SBNI12 leased line adapters if_sf_load="NO" # Adaptec Duralink PCI (AIC-6915 "starfire") +if_sge_load="NO" # Silicon Integrated Systems SiS 190/191 if_sis_load="NO" # Silicon Integrated Systems SiS 900/7016 if_sk_load="NO" # SysKonnect SK-984x series PCI Gigabit Ethernet if_sn_load="NO" # SMC 91Cxx -if_sr_load="NO" # synchronous RISCom/N2 / WANic 400/405 if_ste_load="NO" # Sundance Technologies ST201 Fast Ethernet if_stge_load="NO" # Sundance/Tamarack TC9021 Gigabit Ethernet if_ti_load="NO" # Alteon Networks Tigon 1 and Tigon 2 diff --git a/sys/boot/sparc64/loader/main.c b/sys/boot/sparc64/loader/main.c index 2afbfeeb75e..b3e79084ad3 100644 --- a/sys/boot/sparc64/loader/main.c +++ b/sys/boot/sparc64/loader/main.c @@ -451,7 +451,8 @@ dtlb_va_to_pa_sun4u(vm_offset_t va) reg = dtlb_get_data_sun4u(i); wrpr(pstate, pstate, 0); reg >>= TD_PA_SHIFT; - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) return (reg & TD_PA_CH_MASK); return (reg & TD_PA_SF_MASK); } @@ -474,7 +475,8 @@ itlb_va_to_pa_sun4u(vm_offset_t va) reg = itlb_get_data_sun4u(i); wrpr(pstate, pstate, 0); reg >>= TD_PA_SHIFT; - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) return (reg & TD_PA_CH_MASK); return (reg & TD_PA_SF_MASK); } @@ -696,6 +698,7 @@ cpu_cpuid_prop_sun4u(void) switch (cpu_impl) { case CPU_IMPL_SPARC64: + case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: @@ -720,6 +723,7 @@ cpu_get_mid_sun4u(void) switch (cpu_impl) { case CPU_IMPL_SPARC64: + case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 6cb4f2e5509..6eff691de90 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -686,14 +686,10 @@ adaregister(struct cam_periph *periph, void *arg) else softc->quirks = ADA_Q_NONE; - /* Check if the SIM does not want queued commands */ bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); - if (cpi.ccb_h.status != CAM_REQ_CMP || - (cpi.hba_inquiry & PI_TAG_ABLE) == 0) - softc->flags &= ~ADA_FLAG_CAN_NCQ; TASK_INIT(&softc->sysctl_task, 0, adasysctlinit, periph); diff --git a/sys/cam/ata/ata_pmp.c b/sys/cam/ata/ata_pmp.c index b60a4a7596b..0835a2bf228 100644 --- a/sys/cam/ata/ata_pmp.c +++ b/sys/cam/ata/ata_pmp.c @@ -101,6 +101,7 @@ struct pmp_softc { int events; #define PMP_EV_RESET 1 #define PMP_EV_RESCAN 2 + u_int caps; struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; @@ -457,6 +458,14 @@ pmpstart(struct cam_periph *periph, union ccb *start_ccb) ata_pm_read_cmd(ataio, 2, 15); break; case PMP_STATE_PRECONFIG: + /* Get/update host SATA capabilities. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, periph->path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + xpt_action((union ccb *)&cts); + if (cts.xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + softc->caps = cts.xport_specific.sata.caps; cam_fill_ataio(ataio, pmp_retry_count, pmpdone, @@ -644,14 +653,16 @@ pmpdone(struct cam_periph *periph, union ccb *done_ccb) (done_ccb->ataio.res.lba_mid << 16) + (done_ccb->ataio.res.lba_low << 8) + done_ccb->ataio.res.sector_count; - if ((res & 0xf0f) == 0x103 && (res & 0x0f0) != 0) { + if (((res & 0xf0f) == 0x103 && (res & 0x0f0) != 0) || + (res & 0x600) != 0) { if (bootverbose) { printf("%s%d: port %d status: %08x\n", periph->periph_name, periph->unit_number, softc->pm_step, res); } - /* Report device speed. */ - if (xpt_create_path(&dpath, periph, + /* Report device speed if it is online. */ + if ((res & 0xf0f) == 0x103 && + xpt_create_path(&dpath, periph, xpt_path_path_id(periph->path), softc->pm_step, 0) == CAM_REQ_CMP) { bzero(&cts, sizeof(cts)); @@ -660,6 +671,9 @@ pmpdone(struct cam_periph *periph, union ccb *done_ccb) cts.type = CTS_TYPE_CURRENT_SETTINGS; cts.xport_specific.sata.revision = (res & 0x0f0) >> 4; cts.xport_specific.sata.valid = CTS_SATA_VALID_REVISION; + cts.xport_specific.sata.caps = softc->caps & + (CTS_SATA_CAPS_H_PMREQ | CTS_SATA_CAPS_H_DMAAA); + cts.xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; xpt_action((union ccb *)&cts); xpt_free_path(dpath); } diff --git a/sys/cam/ata/ata_xpt.c b/sys/cam/ata/ata_xpt.c index d5e0b32a49b..478b0060ce4 100644 --- a/sys/cam/ata/ata_xpt.c +++ b/sys/cam/ata/ata_xpt.c @@ -88,6 +88,9 @@ typedef enum { PROBE_IDENTIFY, PROBE_SPINUP, PROBE_SETMODE, + PROBE_SETPM, + PROBE_SETAPST, + PROBE_SETDMAAA, PROBE_SET_MULTI, PROBE_INQUIRY, PROBE_FULL_INQUIRY, @@ -101,6 +104,9 @@ static char *probe_action_text[] = { "PROBE_IDENTIFY", "PROBE_SPINUP", "PROBE_SETMODE", + "PROBE_SETPM", + "PROBE_SETAPST", + "PROBE_SETDMAAA", "PROBE_SET_MULTI", "PROBE_INQUIRY", "PROBE_FULL_INQUIRY", @@ -132,6 +138,7 @@ typedef struct { uint32_t pm_prv; int restart; int spinup; + u_int caps; struct cam_periph *periph; } probe_softc; @@ -393,6 +400,45 @@ negotiate: ata_28bit_cmd(ataio, ATA_SETFEATURES, ATA_SF_SETXFER, 0, mode); break; } + case PROBE_SETPM: + cam_fill_ataio(ataio, + 1, + probedone, + CAM_DIR_NONE, + 0, + NULL, + 0, + 30*1000); + ata_28bit_cmd(ataio, ATA_SETFEATURES, + (softc->caps & CTS_SATA_CAPS_H_PMREQ) ? 0x10 : 0x90, + 0, 0x03); + break; + case PROBE_SETAPST: + cam_fill_ataio(ataio, + 1, + probedone, + CAM_DIR_NONE, + 0, + NULL, + 0, + 30*1000); + ata_28bit_cmd(ataio, ATA_SETFEATURES, + (softc->caps & CTS_SATA_CAPS_H_APST) ? 0x10 : 0x90, + 0, 0x07); + break; + case PROBE_SETDMAAA: + cam_fill_ataio(ataio, + 1, + probedone, + CAM_DIR_NONE, + 0, + NULL, + 0, + 30*1000); + ata_28bit_cmd(ataio, ATA_SETFEATURES, + (softc->caps & CTS_SATA_CAPS_H_DMAAA) ? 0x10 : 0x90, + 0, 0x02); + break; case PROBE_SET_MULTI: { u_int sectors, bytecount; @@ -685,6 +731,7 @@ probedone(struct cam_periph *periph, union ccb *done_ccb) probe_softc *softc; struct cam_path *path; u_int32_t priority; + u_int caps; int found = 1; CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("probedone\n")); @@ -766,7 +813,9 @@ noerror: } case PROBE_IDENTIFY: { + struct ccb_pathinq cpi; int16_t *ptr; + int changed = 1; ident_buf = &softc->ident_data; for (ptr = (int16_t *)ident_buf; @@ -808,9 +857,12 @@ noerror: sizeof(ident_buf->serial))) { /* Device changed. */ xpt_async(AC_LOST_DEVICE, path, NULL); - } else + } else { bcopy(&softc->ident_data, ident_buf, sizeof(struct ata_params)); - } else { + changed = 0; + } + } + if (changed) { bcopy(&softc->ident_data, ident_buf, sizeof(struct ata_params)); /* Clean up from previous instance of this device */ if (path->device->serial_num != NULL) { @@ -840,16 +892,24 @@ noerror: ata_find_quirk(path->device); if (path->device->mintags != 0 && path->bus->sim->max_tagged_dev_openings != 0) { - /* Report SIM which tags are allowed. */ - bzero(&cts, sizeof(cts)); - xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); - cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; - cts.type = CTS_TYPE_CURRENT_SETTINGS; - cts.xport_specific.sata.tags = path->device->maxtags; - cts.xport_specific.sata.valid = CTS_SATA_VALID_TAGS; - xpt_action((union ccb *)&cts); - /* Reconfigure queues for tagged queueing. */ - xpt_start_tags(path); + /* Check if the SIM does not want queued commands. */ + bzero(&cpi, sizeof(cpi)); + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + if (cpi.ccb_h.status == CAM_REQ_CMP && + (cpi.hba_inquiry & PI_TAG_ABLE)) { + /* Report SIM which tags are allowed. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + cts.xport_specific.sata.tags = path->device->maxtags; + cts.xport_specific.sata.valid = CTS_SATA_VALID_TAGS; + xpt_action((union ccb *)&cts); + /* Reconfigure queues for tagged queueing. */ + xpt_start_tags(path); + } } ata_device_transport(path); PROBE_SET_ACTION(softc, PROBE_SETMODE); @@ -866,6 +926,67 @@ noerror: xpt_schedule(periph, priority); return; case PROBE_SETMODE: + if (path->device->transport != XPORT_SATA) + goto notsata; + /* Set supported bits. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + xpt_action((union ccb *)&cts); + if (cts.xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + caps = cts.xport_specific.sata.caps & CTS_SATA_CAPS_H; + else + caps = 0; + if (ident_buf->satacapabilities != 0xffff) { + if (ident_buf->satacapabilities & ATA_SUPPORT_IFPWRMNGTRCV) + caps |= CTS_SATA_CAPS_D_PMREQ; + if (ident_buf->satacapabilities & ATA_SUPPORT_HAPST) + caps |= CTS_SATA_CAPS_D_APST; + } + /* Mask unwanted bits. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_USER_SETTINGS; + xpt_action((union ccb *)&cts); + if (cts.xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + caps &= cts.xport_specific.sata.caps; + /* Store result to SIM. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + cts.xport_specific.sata.caps = caps; + cts.xport_specific.sata.valid = CTS_SATA_VALID_CAPS; + xpt_action((union ccb *)&cts); + softc->caps = caps; + if (ident_buf->satasupport & ATA_SUPPORT_IFPWRMNGT) { + PROBE_SET_ACTION(softc, PROBE_SETPM); + xpt_release_ccb(done_ccb); + xpt_schedule(periph, priority); + return; + } + /* FALLTHROUGH */ + case PROBE_SETPM: + if (ident_buf->satacapabilities != 0xffff && + ident_buf->satacapabilities & ATA_SUPPORT_DAPST) { + PROBE_SET_ACTION(softc, PROBE_SETAPST); + xpt_release_ccb(done_ccb); + xpt_schedule(periph, priority); + return; + } + /* FALLTHROUGH */ + case PROBE_SETAPST: + if (ident_buf->satasupport & ATA_SUPPORT_AUTOACTIVATE) { + PROBE_SET_ACTION(softc, PROBE_SETDMAAA); + xpt_release_ccb(done_ccb); + xpt_schedule(periph, priority); + return; + } + /* FALLTHROUGH */ + case PROBE_SETDMAAA: +notsata: if (path->device->protocol == PROTO_ATA) { PROBE_SET_ACTION(softc, PROBE_SET_MULTI); } else { @@ -951,6 +1072,35 @@ noerror: snprintf(ident_buf->revision, sizeof(ident_buf->revision), "%04x", softc->pm_prv); path->device->flags |= CAM_DEV_IDENTIFY_DATA_VALID; + /* Set supported bits. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + xpt_action((union ccb *)&cts); + if (cts.xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + caps = cts.xport_specific.sata.caps & CTS_SATA_CAPS_H; + else + caps = 0; + /* All PMPs must support PM requests. */ + caps |= CTS_SATA_CAPS_D_PMREQ; + /* Mask unwanted bits. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_USER_SETTINGS; + xpt_action((union ccb *)&cts); + if (cts.xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + caps &= cts.xport_specific.sata.caps; + /* Store result to SIM. */ + bzero(&cts, sizeof(cts)); + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + cts.xport_specific.sata.caps = caps; + cts.xport_specific.sata.valid = CTS_SATA_VALID_CAPS; + xpt_action((union ccb *)&cts); + softc->caps = caps; if (periph->path->device->flags & CAM_DEV_UNCONFIGURED) { path->device->flags &= ~CAM_DEV_UNCONFIGURED; xpt_acquire_device(path->device); @@ -1105,13 +1255,13 @@ ata_scan_bus(struct cam_periph *periph, union ccb *request_ccb) work_ccb = request_ccb; /* Reuse the same CCB to query if a device was really found */ scan_info = (ata_scan_bus_info *)work_ccb->ccb_h.ppriv_ptr0; - /* Free the current request path- we're done with it. */ - xpt_free_path(work_ccb->ccb_h.path); /* If there is PMP... */ if ((scan_info->cpi->hba_inquiry & PI_SATAPM) && (scan_info->counter == scan_info->cpi->max_target)) { if (work_ccb->ccb_h.status == CAM_REQ_CMP) { - /* everything else willbe probed by it */ + /* everything else will be probed by it */ + /* Free the current request path- we're done with it. */ + xpt_free_path(work_ccb->ccb_h.path); goto done; } else { struct ccb_trans_settings cts; @@ -1119,7 +1269,7 @@ ata_scan_bus(struct cam_periph *periph, union ccb *request_ccb) /* Report SIM that PM is absent. */ bzero(&cts, sizeof(cts)); xpt_setup_ccb(&cts.ccb_h, - scan_info->request_ccb->ccb_h.path, 1); + work_ccb->ccb_h.path, CAM_PRIORITY_NONE); cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; cts.type = CTS_TYPE_CURRENT_SETTINGS; cts.xport_specific.sata.pm_present = 0; @@ -1127,6 +1277,8 @@ ata_scan_bus(struct cam_periph *periph, union ccb *request_ccb) xpt_action((union ccb *)&cts); } } + /* Free the current request path- we're done with it. */ + xpt_free_path(work_ccb->ccb_h.path); if (scan_info->counter == ((scan_info->cpi->hba_inquiry & PI_SATAPM) ? 0 : scan_info->cpi->max_target)) { diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h index 2810c545f71..c5ac35e9846 100644 --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -837,12 +837,21 @@ struct ccb_trans_settings_sata { #define CTS_SATA_VALID_PM 0x08 #define CTS_SATA_VALID_TAGS 0x10 #define CTS_SATA_VALID_ATAPI 0x20 +#define CTS_SATA_VALID_CAPS 0x40 int mode; /* Legacy PATA mode */ u_int bytecount; /* Length of PIO transaction */ int revision; /* SATA revision */ u_int pm_present; /* PM is present (XPT->SIM) */ u_int tags; /* Number of allowed tags */ u_int atapi; /* Length of ATAPI CDB */ + u_int caps; /* Device and host SATA caps. */ +#define CTS_SATA_CAPS_H 0x0000ffff +#define CTS_SATA_CAPS_H_PMREQ 0x00000001 +#define CTS_SATA_CAPS_H_APST 0x00000002 +#define CTS_SATA_CAPS_H_DMAAA 0x00000010 /* Auto-activation */ +#define CTS_SATA_CAPS_D 0xffff0000 +#define CTS_SATA_CAPS_D_PMREQ 0x00010000 +#define CTS_SATA_CAPS_D_APST 0x00020000 }; /* Get/Set transfer rate/width/disconnection/tag queueing settings */ diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 1812edbf70e..5d83539c899 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -467,34 +467,7 @@ xptioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td ccb = xpt_alloc_ccb(); CAM_SIM_LOCK(bus->sim); - /* Ensure passed in target/lun supported on this bus. */ - if ((inccb->ccb_h.target_id != CAM_TARGET_WILDCARD) || - (inccb->ccb_h.target_lun != CAM_LUN_WILDCARD)) { - if (xpt_create_path(&ccb->ccb_h.path, - xpt_periph, - inccb->ccb_h.path_id, - CAM_TARGET_WILDCARD, - CAM_LUN_WILDCARD) != CAM_REQ_CMP) { - error = EINVAL; - CAM_SIM_UNLOCK(bus->sim); - xpt_free_ccb(ccb); - break; - } - xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path, - inccb->ccb_h.pinfo.priority); - ccb->ccb_h.func_code = XPT_PATH_INQ; - xpt_action(ccb); - xpt_free_path(ccb->ccb_h.path); - if ((inccb->ccb_h.target_id != CAM_TARGET_WILDCARD && - inccb->ccb_h.target_id > ccb->cpi.max_target) || - (inccb->ccb_h.target_lun != CAM_LUN_WILDCARD && - inccb->ccb_h.target_lun > ccb->cpi.max_lun)) { - error = EINVAL; - CAM_SIM_UNLOCK(bus->sim); - xpt_free_ccb(ccb); - break; - } - } + /* * Create a path using the bus, target, and lun the * user passed in. @@ -4901,6 +4874,8 @@ camisr_runqueue(void *V_queue) if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0 && (--dev->tag_delay_count == 0)) xpt_start_tags(ccb_h->path); + if (!device_is_send_queued(dev)) + xpt_schedule_dev_sendq(ccb_h->path->bus, dev); } if (ccb_h->status & CAM_RELEASE_SIMQ) { diff --git a/sys/cam/scsi/scsi_cd.c b/sys/cam/scsi/scsi_cd.c index 1e5be3c5886..d09d129a947 100644 --- a/sys/cam/scsi/scsi_cd.c +++ b/sys/cam/scsi/scsi_cd.c @@ -2773,8 +2773,12 @@ cdcheckmedia(struct cam_periph *periph) softc->flags &= ~(CD_FLAG_VALID_MEDIA|CD_FLAG_VALID_TOC); cdprevent(periph, PR_ALLOW); return (error); - } else + } else { softc->flags |= CD_FLAG_VALID_MEDIA; + softc->disk->d_sectorsize = softc->params.blksize; + softc->disk->d_mediasize = + (off_t)softc->params.blksize * softc->params.disksize; + } /* * Now we check the table of contents. This (currently) is only @@ -2863,9 +2867,6 @@ cdcheckmedia(struct cam_periph *periph) } softc->flags |= CD_FLAG_VALID_TOC; - softc->disk->d_sectorsize = softc->params.blksize; - softc->disk->d_mediasize = - (off_t)softc->params.blksize * softc->params.disksize; bailout: diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c b/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c index 584be24100e..1e3b1eff391 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c @@ -52,9 +52,9 @@ static void system_taskq_init(void *arg) { - system_taskq = (taskq_t *)taskqueue_thread; taskq_zone = uma_zcreate("taskq_zone", sizeof(struct ostask), NULL, NULL, NULL, NULL, 0, 0); + system_taskq = taskq_create("system_taskq", mp_ncpus, 0, 0, 0, 0); } SYSINIT(system_taskq_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_init, NULL); @@ -62,6 +62,7 @@ static void system_taskq_fini(void *arg) { + taskq_destroy(system_taskq); uma_zdestroy(taskq_zone); } SYSUNINIT(system_taskq_fini, SI_SUB_CONFIGURE, SI_ORDER_ANY, system_taskq_fini, NULL); @@ -72,10 +73,8 @@ taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused, { taskq_t *tq; - if ((flags & TASKQ_THREADS_CPU_PCT) != 0) { - /* TODO: Calculate number od threads. */ - printf("%s: TASKQ_THREADS_CPU_PCT\n", __func__); - } + if ((flags & TASKQ_THREADS_CPU_PCT) != 0) + nthreads = MAX((mp_ncpus * nthreads) / 100, 1); tq = kmem_alloc(sizeof(*tq), KM_SLEEP); tq->tq_queue = taskqueue_create(name, M_WAITOK, taskqueue_thread_enqueue, @@ -85,6 +84,14 @@ taskq_create(const char *name, int nthreads, pri_t pri, int minalloc __unused, return ((taskq_t *)tq); } +taskq_t * +taskq_create_proc(const char *name, int nthreads, pri_t pri, int minalloc, + int maxalloc, proc_t *proc __unused, uint_t flags) +{ + + return (taskq_create(name, nthreads, pri, minalloc, maxalloc, flags)); +} + void taskq_destroy(taskq_t *tq) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index ca8ffb1ea13..ceb4c87f2f6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -131,7 +131,6 @@ #include #include -#include #include static kmutex_t arc_reclaim_thr_lock; @@ -238,7 +237,7 @@ struct arcs_lock { */ #define ARC_BUFC_NUMDATALISTS 16 #define ARC_BUFC_NUMMETADATALISTS 16 -#define ARC_BUFC_NUMLISTS (ARC_BUFC_NUMMETADATALISTS+ARC_BUFC_NUMDATALISTS) +#define ARC_BUFC_NUMLISTS (ARC_BUFC_NUMMETADATALISTS + ARC_BUFC_NUMDATALISTS) typedef struct arc_state { uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */ @@ -247,7 +246,7 @@ typedef struct arc_state { struct arcs_lock arcs_locks[ARC_BUFC_NUMLISTS] __aligned(CACHE_LINE_SIZE); } arc_state_t; -#define ARCS_LOCK(s, i) &((s)->arcs_locks[(i)].arcs_lock) +#define ARCS_LOCK(s, i) (&((s)->arcs_locks[(i)].arcs_lock)) /* The 6 states: */ static arc_state_t ARC_anon; @@ -307,9 +306,9 @@ typedef struct arc_stats { kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_memory_throttle_count; kstat_named_t arcstat_l2_write_trylock_fail; - kstat_named_t arcstat_l2_write_in_l2; kstat_named_t arcstat_l2_write_passed_headroom; kstat_named_t arcstat_l2_write_spa_mismatch; + kstat_named_t arcstat_l2_write_in_l2; kstat_named_t arcstat_l2_write_hdr_io_in_progress; kstat_named_t arcstat_l2_write_not_cacheable; kstat_named_t arcstat_l2_write_full; @@ -370,19 +369,19 @@ static arc_stats_t arc_stats = { { "l2_size", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, { "memory_throttle_count", KSTAT_DATA_UINT64 }, - { "l2_write_trylock_fail", KSTAT_DATA_UINT64 }, - { "l2_write_in_l2", KSTAT_DATA_UINT64 }, - { "l2_write_passed_headroom", KSTAT_DATA_UINT64 }, - { "l2_write_spa_mismatch", KSTAT_DATA_UINT64 }, - { "l2_write_io_in_progress", KSTAT_DATA_UINT64 }, - { "l2_write_not_cacheable", KSTAT_DATA_UINT64 }, - { "l2_write_full", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_iter", KSTAT_DATA_UINT64 }, - { "l2_write_pios", KSTAT_DATA_UINT64 }, - { "l2_write_bytes_written", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 }, - { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 } + { "l2_write_trylock_fail", KSTAT_DATA_UINT64 }, + { "l2_write_passed_headroom", KSTAT_DATA_UINT64 }, + { "l2_write_spa_mismatch", KSTAT_DATA_UINT64 }, + { "l2_write_in_l2", KSTAT_DATA_UINT64 }, + { "l2_write_io_in_progress", KSTAT_DATA_UINT64 }, + { "l2_write_not_cacheable", KSTAT_DATA_UINT64 }, + { "l2_write_full", KSTAT_DATA_UINT64 }, + { "l2_write_buffer_iter", KSTAT_DATA_UINT64 }, + { "l2_write_pios", KSTAT_DATA_UINT64 }, + { "l2_write_bytes_written", KSTAT_DATA_UINT64 }, + { "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 }, + { "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 }, + { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 } }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) @@ -390,7 +389,7 @@ static arc_stats_t arc_stats = { #define ARCSTAT_INCR(stat, val) \ atomic_add_64(&arc_stats.stat.value.ui64, (val)); -#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) +#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) #define ARCSTAT_MAX(stat, val) { \ @@ -424,7 +423,7 @@ static arc_stats_t arc_stats = { } kstat_t *arc_ksp; -static arc_state_t *arc_anon; +static arc_state_t *arc_anon; static arc_state_t *arc_mru; static arc_state_t *arc_mru_ghost; static arc_state_t *arc_mfu; @@ -1076,10 +1075,10 @@ get_buf_info(arc_buf_hdr_t *ab, arc_state_t *state, list_t **list, kmutex_t **lo { uint64_t buf_hashid = buf_hash(ab->b_spa, &ab->b_dva, ab->b_birth); - if (ab->b_type == ARC_BUFC_METADATA) - buf_hashid &= (ARC_BUFC_NUMMETADATALISTS-1); + if (ab->b_type == ARC_BUFC_METADATA) + buf_hashid &= (ARC_BUFC_NUMMETADATALISTS - 1); else { - buf_hashid &= (ARC_BUFC_NUMDATALISTS-1); + buf_hashid &= (ARC_BUFC_NUMDATALISTS - 1); buf_hashid += ARC_BUFC_NUMMETADATALISTS; } @@ -1096,18 +1095,16 @@ add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) if ((refcount_add(&ab->b_refcnt, tag) == 1) && (ab->b_state != arc_anon)) { - list_t *list; - kmutex_t *lock; uint64_t delta = ab->b_size * ab->b_datacnt; uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type]; + list_t *list; + kmutex_t *lock; get_buf_info(ab, ab->b_state, &list, &lock); ASSERT(!MUTEX_HELD(lock)); mutex_enter(lock); ASSERT(list_link_active(&ab->b_arc_node)); list_remove(list, ab); - mutex_exit(lock); - if (GHOST_STATE(ab->b_state)) { ASSERT3U(ab->b_datacnt, ==, 0); ASSERT3P(ab->b_buf, ==, NULL); @@ -1116,6 +1113,7 @@ add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) ASSERT(delta > 0); ASSERT3U(*size, >=, delta); atomic_add_64(size, -delta); + mutex_exit(lock); /* remove the prefetch flag if we get a reference */ if (ab->b_flags & ARC_PREFETCH) ab->b_flags &= ~ARC_PREFETCH; @@ -1138,15 +1136,13 @@ remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) kmutex_t *lock; get_buf_info(ab, state, &list, &lock); - ASSERT(!MUTEX_HELD(lock)); mutex_enter(lock); ASSERT(!list_link_active(&ab->b_arc_node)); list_insert_head(list, ab); - mutex_exit(lock); - ASSERT(ab->b_datacnt > 0); atomic_add_64(size, ab->b_size * ab->b_datacnt); + mutex_exit(lock); } return (cnt); } @@ -1182,7 +1178,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock) get_buf_info(ab, old_state, &list, &lock); use_mutex = !MUTEX_HELD(lock); - if (use_mutex) mutex_enter(lock); @@ -1205,13 +1200,11 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock) mutex_exit(lock); } if (new_state != arc_anon) { - int use_mutex; + int use_mutex; uint64_t *size = &new_state->arcs_lsize[ab->b_type]; get_buf_info(ab, new_state, &list, &lock); use_mutex = !MUTEX_HELD(lock); - - if (use_mutex) mutex_enter(lock); @@ -1629,7 +1622,7 @@ arc_evict(arc_state_t *state, spa_t *spa, int64_t bytes, boolean_t recycle, ASSERT(state == arc_mru || state == arc_mfu); evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; - + if (type == ARC_BUFC_METADATA) { offset = 0; list_count = ARC_BUFC_NUMMETADATALISTS; @@ -1638,7 +1631,6 @@ arc_evict(arc_state_t *state, spa_t *spa, int64_t bytes, boolean_t recycle, idx = evict_metadata_offset; } else { offset = ARC_BUFC_NUMMETADATALISTS; - list_start = &state->arcs_lists[offset]; evicted_list_start = &evicted_state->arcs_lists[offset]; list_count = ARC_BUFC_NUMDATALISTS; @@ -1646,12 +1638,12 @@ arc_evict(arc_state_t *state, spa_t *spa, int64_t bytes, boolean_t recycle, } bytes_remaining = evicted_state->arcs_lsize[type]; count = 0; - + evict_start: list = &list_start[idx]; evicted_list = &evicted_list_start[idx]; lock = ARCS_LOCK(state, (offset + idx)); - evicted_lock = ARCS_LOCK(evicted_state, (offset + idx)); + evicted_lock = ARCS_LOCK(evicted_state, (offset + idx)); mutex_enter(lock); mutex_enter(evicted_lock); @@ -1721,7 +1713,7 @@ evict_start: if (bytes_remaining > 0) { mutex_exit(evicted_lock); mutex_exit(lock); - idx = ((idx + 1)&(list_count-1)); + idx = ((idx + 1) & (list_count - 1)); count++; goto evict_start; } @@ -1732,8 +1724,8 @@ evict_start: mutex_exit(evicted_lock); mutex_exit(lock); - - idx = ((idx + 1)&(list_count-1)); + + idx = ((idx + 1) & (list_count - 1)); count++; if (bytes_evicted < bytes) { @@ -1743,11 +1735,11 @@ evict_start: dprintf("only evicted %lld bytes from %x", (longlong_t)bytes_evicted, state); } - if (type == ARC_BUFC_METADATA) + if (type == ARC_BUFC_METADATA) evict_metadata_offset = idx; else evict_data_offset = idx; - + if (skipped) ARCSTAT_INCR(arcstat_evict_skip, skipped); @@ -1804,7 +1796,7 @@ arc_evict_ghost(arc_state_t *state, spa_t *spa, int64_t bytes) list_start = &state->arcs_lists[ARC_BUFC_NUMMETADATALISTS]; list_count = ARC_BUFC_NUMDATALISTS; offset = ARC_BUFC_NUMMETADATALISTS; - + evict_start: list = &list_start[idx]; lock = ARCS_LOCK(state, idx + offset); @@ -1851,12 +1843,12 @@ evict_start: } } mutex_exit(lock); - idx = ((idx + 1)&(ARC_BUFC_NUMDATALISTS-1)); + idx = ((idx + 1) & (ARC_BUFC_NUMDATALISTS - 1)); count++; - + if (count < list_count) goto evict_start; - + evict_offset = idx; if ((uintptr_t)list > (uintptr_t)&state->arcs_lists[ARC_BUFC_NUMMETADATALISTS] && (bytes < 0 || bytes_deleted < bytes)) { @@ -1945,7 +1937,7 @@ arc_do_user_evicts(void) /* * Move list over to avoid LOR */ -restart: +restart: mutex_enter(&arc_eviction_mtx); tmp_arc_eviction_list = arc_eviction_list; arc_eviction_list = NULL; @@ -2056,7 +2048,7 @@ arc_reclaim_needed(void) return (0); /* - * If pages are needed or we're within 2048 pages + * If pages are needed or we're within 2048 pages * of needing to page need to reclaim */ if (vm_pages_needed || (vm_paging_target() > -2048)) @@ -2614,10 +2606,7 @@ arc_read_done(zio_t *zio) hdr->b_flags &= ~ARC_L2_EVICTED; if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH)) hdr->b_flags &= ~ARC_L2CACHE; -#if 0 - else if ((hdr->b_flags & ARC_PREFETCH) == 0) - hdr->b_flags |= ARC_L2CACHE; -#endif + /* byteswap if necessary */ callback_list = hdr->b_acb; ASSERT(callback_list != NULL); @@ -2731,7 +2720,6 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf, uint32_t *arc_flags, const zbookmark_t *zb) { int err; - arc_buf_hdr_t *hdr = pbuf->b_hdr; ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt)); ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size); @@ -2739,8 +2727,6 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf, err = arc_read_nolock(pio, spa, bp, done, private, priority, zio_flags, arc_flags, zb); - - ASSERT3P(hdr, ==, pbuf->b_hdr); rw_exit(&pbuf->b_lock); return (err); } @@ -2957,7 +2943,7 @@ top: * released by l2arc_read_done(). */ rzio = zio_read_phys(pio, vd, addr, size, - buf->b_data, ZIO_CHECKSUM_OFF, + buf->b_data, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | @@ -3054,7 +3040,7 @@ arc_buf_evict(arc_buf_t *buf) arc_buf_t **bufp; list_t *list, *evicted_list; kmutex_t *lock, *evicted_lock; - + rw_enter(&buf->b_lock, RW_WRITER); hdr = buf->b_hdr; if (hdr == NULL) { @@ -3729,7 +3715,6 @@ arc_init(void) arc_size = 0; for (i = 0; i < ARC_BUFC_NUMLISTS; i++) { - mutex_init(&arc_anon->arcs_locks[i].arcs_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&arc_mru->arcs_locks[i].arcs_lock, @@ -3742,7 +3727,7 @@ arc_init(void) NULL, MUTEX_DEFAULT, NULL); mutex_init(&arc_l2c_only->arcs_locks[i].arcs_lock, NULL, MUTEX_DEFAULT, NULL); - + list_create(&arc_mru->arcs_lists[i], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node)); list_create(&arc_mru_ghost->arcs_lists[i], @@ -3792,7 +3777,7 @@ arc_init(void) #ifdef _KERNEL if (TUNABLE_INT_FETCH("vfs.zfs.prefetch_disable", &zfs_prefetch_disable)) prefetch_tunable_set = 1; - + #ifdef __i386__ if (prefetch_tunable_set == 0) { printf("ZFS NOTICE: Prefetch is disabled by default on i386 " @@ -3801,7 +3786,7 @@ arc_init(void) "to /boot/loader.conf.\n"); zfs_prefetch_disable=1; } -#else +#else if ((((uint64_t)physmem * PAGESIZE) < (1ULL << 32)) && prefetch_tunable_set == 0) { printf("ZFS NOTICE: Prefetch is disabled by default if less " @@ -3810,7 +3795,7 @@ arc_init(void) "to /boot/loader.conf.\n"); zfs_prefetch_disable=1; } -#endif +#endif /* Warn about ZFS memory and address space requirements. */ if (((uint64_t)physmem * PAGESIZE) < (256 + 128 + 64) * (1 << 20)) { printf("ZFS WARNING: Recommended minimum RAM size is 512MB; " @@ -3830,7 +3815,7 @@ void arc_fini(void) { int i; - + mutex_enter(&arc_reclaim_thr_lock); arc_thread_exit = 1; cv_signal(&arc_reclaim_thr_cv); @@ -3856,14 +3841,16 @@ arc_fini(void) list_destroy(&arc_mru_ghost->arcs_lists[i]); list_destroy(&arc_mfu->arcs_lists[i]); list_destroy(&arc_mfu_ghost->arcs_lists[i]); + list_destroy(&arc_l2c_only->arcs_lists[i]); mutex_destroy(&arc_anon->arcs_locks[i].arcs_lock); mutex_destroy(&arc_mru->arcs_locks[i].arcs_lock); mutex_destroy(&arc_mru_ghost->arcs_locks[i].arcs_lock); mutex_destroy(&arc_mfu->arcs_locks[i].arcs_lock); mutex_destroy(&arc_mfu_ghost->arcs_locks[i].arcs_lock); + mutex_destroy(&arc_l2c_only->arcs_locks[i].arcs_lock); } - + mutex_destroy(&zfs_write_limit_lock); buf_fini(); @@ -4259,18 +4246,18 @@ l2arc_list_locked(int list_num, kmutex_t **lock) { list_t *list; int idx; - - ASSERT(list_num >= 0 && list_num < 2*ARC_BUFC_NUMLISTS); + + ASSERT(list_num >= 0 && list_num < 2 * ARC_BUFC_NUMLISTS); if (list_num < ARC_BUFC_NUMMETADATALISTS) { idx = list_num; list = &arc_mfu->arcs_lists[idx]; *lock = ARCS_LOCK(arc_mfu, idx); - } else if (list_num < ARC_BUFC_NUMMETADATALISTS*2) { + } else if (list_num < ARC_BUFC_NUMMETADATALISTS * 2) { idx = list_num - ARC_BUFC_NUMMETADATALISTS; list = &arc_mru->arcs_lists[idx]; *lock = ARCS_LOCK(arc_mru, idx); - } else if (list_num < (ARC_BUFC_NUMMETADATALISTS*2 + + } else if (list_num < (ARC_BUFC_NUMMETADATALISTS * 2 + ARC_BUFC_NUMDATALISTS)) { idx = list_num - ARC_BUFC_NUMMETADATALISTS; list = &arc_mfu->arcs_lists[idx]; @@ -4281,8 +4268,6 @@ l2arc_list_locked(int list_num, kmutex_t **lock) *lock = ARCS_LOCK(arc_mru, idx); } - CTR3(KTR_SPARE2, "list=%p list_num=%d idx=%d", - list, list_num, idx); ASSERT(!(MUTEX_HELD(*lock))); mutex_enter(*lock); return (list); @@ -4452,7 +4437,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) * Copy buffers for L2ARC writing. */ mutex_enter(&l2arc_buflist_mtx); - for (try = 0; try < 2*ARC_BUFC_NUMLISTS; try++) { + for (try = 0; try < 2 * ARC_BUFC_NUMLISTS; try++) { list = l2arc_list_locked(try, &list_lock); passed_sz = 0; ARCSTAT_BUMP(arcstat_l2_write_buffer_list_iter); @@ -4468,9 +4453,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ab = list_head(list); else ab = list_tail(list); - if (ab == NULL) { + if (ab == NULL) ARCSTAT_BUMP(arcstat_l2_write_buffer_list_null_iter); - } for (; ab; ab = ab_prev) { if (arc_warm == B_FALSE) @@ -4478,7 +4462,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) else ab_prev = list_prev(list, ab); ARCSTAT_INCR(arcstat_l2_write_buffer_bytes_scanned, ab->b_size); - + hash_lock = HDR_LOCK(ab); have_lock = MUTEX_HELD(hash_lock); if (!have_lock && !mutex_tryenter(hash_lock)) { @@ -4562,6 +4546,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdrl2->b_dev = dev; hdrl2->b_daddr = dev->l2ad_hand; + ab->b_flags |= ARC_L2_WRITING; ab->b_l2hdr = hdrl2; list_insert_head(dev->l2ad_buflist, ab); buf_data = ab->b_buf->b_data; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c index 3bf0939a521..053c1e1baa2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c @@ -464,15 +464,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) ASSERT(db->db_buf == NULL); if (db->db_blkid == DB_BONUS_BLKID) { - int bonuslen = dn->dn_bonuslen; + int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); ASSERT3U(bonuslen, <=, db->db.db_size); db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); arc_space_consume(DN_MAX_BONUSLEN); if (bonuslen < DN_MAX_BONUSLEN) bzero(db->db.db_data, DN_MAX_BONUSLEN); - bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, - bonuslen); + if (bonuslen) + bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); dbuf_update_data(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c index 25dfafd4f2c..1f91fc1ad36 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c @@ -128,15 +128,6 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, return (0); } - tx = dmu_tx_create(os); - dmu_tx_hold_bonus(tx, object); - err = dmu_tx_assign(tx, TXG_WAIT); - if (err) { - dmu_tx_abort(tx); - dnode_rele(dn, FTAG); - return (err); - } - nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); /* @@ -144,16 +135,27 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, * be a new file instance. We must clear out the previous file * contents before we can change this type of metadata in the dnode. */ - if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) - dmu_free_long_range(os, object, 0, DMU_OBJECT_END); + if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) { + err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END); + if (err) + goto out; + } + + tx = dmu_tx_create(os); + dmu_tx_hold_bonus(tx, object); + err = dmu_tx_assign(tx, TXG_WAIT); + if (err) { + dmu_tx_abort(tx); + goto out; + } dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx); dmu_tx_commit(tx); - +out: dnode_rele(dn, FTAG); - return (0); + return (err); } int diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c index 7981e06825c..c9e00d51151 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c @@ -1213,6 +1213,39 @@ dmu_objset_find_spa(spa_t *spa, const char *name, return (err); } +/* ARGSUSED */ +int +dmu_objset_prefetch(char *name, void *arg) +{ + dsl_dataset_t *ds; + + if (dsl_dataset_hold(name, FTAG, &ds)) + return (0); + + if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { + mutex_enter(&ds->ds_opening_lock); + if (!dsl_dataset_get_user_ptr(ds)) { + uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + zbookmark_t zb; + + zb.zb_objset = ds->ds_object; + zb.zb_object = 0; + zb.zb_level = -1; + zb.zb_blkid = 0; + + (void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds), + &ds->ds_phys->ds_bp, NULL, NULL, + ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &aflags, &zb); + } + mutex_exit(&ds->ds_opening_lock); + } + + dsl_dataset_rele(ds, FTAG); + return (0); +} + void dmu_objset_set_user(objset_t *os, void *user_ptr) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c index 03af3d1583c..58a79cac24b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c @@ -300,6 +300,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) tx = dmu_tx_create_assigned(dp, txg); dp->dp_read_overhead = 0; + start = gethrtime(); zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { if (!list_link_active(&ds->ds_synced_link)) @@ -310,7 +311,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) } DTRACE_PROBE(pool_sync__1setup); - start = gethrtime(); err = zio_wait(zio); write_time = gethrtime() - start; ASSERT(err == 0); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 90861bab4c7..888b8829dd0 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -2564,11 +2564,12 @@ spa_tryimport(nvlist_t *tryconfig) * The act of destroying or exporting a pool is very simple. We make sure there * is no more pending I/O and any references to the pool are gone. Then, we * update the pool state and sync all the labels to disk, removing the - * configuration from the cache afterwards. + * configuration from the cache afterwards. If the 'hardforce' flag is set, then + * we don't sync the labels or remove the configuration cache. */ static int spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, - boolean_t force) + boolean_t force, boolean_t hardforce) { spa_t *spa; @@ -2636,7 +2637,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, * so mark them all dirty. spa_unload() will do the * final sync that pushes these changes out. */ - if (new_state != POOL_STATE_UNINITIALIZED) { + if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa->spa_state = new_state; spa->spa_final_txg = spa_last_synced_txg(spa) + 1; @@ -2656,7 +2657,8 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); if (new_state != POOL_STATE_UNINITIALIZED) { - spa_config_sync(spa, B_TRUE, B_TRUE); + if (!hardforce) + spa_config_sync(spa, B_TRUE, B_TRUE); spa_remove(spa); } mutex_exit(&spa_namespace_lock); @@ -2670,16 +2672,19 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, int spa_destroy(char *pool) { - return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, B_FALSE)); + return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, + B_FALSE, B_FALSE)); } /* * Export a storage pool. */ int -spa_export(char *pool, nvlist_t **oldconfig, boolean_t force) +spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, + boolean_t hardforce) { - return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, force)); + return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, + force, hardforce)); } /* @@ -2690,7 +2695,7 @@ int spa_reset(char *pool) { return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, - B_FALSE)); + B_FALSE, B_FALSE)); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h index 3ca7249305a..f3e00877a8e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h @@ -55,8 +55,8 @@ struct arc_buf { }; typedef enum arc_buf_contents { - ARC_BUFC_METADATA, /* buffer contains metadata */ ARC_BUFC_DATA, /* buffer contains data */ + ARC_BUFC_METADATA, /* buffer contains metadata */ ARC_BUFC_NUMTYPES } arc_buf_contents_t; /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h index 15df29a1779..1d65727808c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h @@ -26,8 +26,6 @@ #ifndef _SYS_DMU_OBJSET_H #define _SYS_DMU_OBJSET_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -118,6 +116,7 @@ int dmu_objset_find(char *name, int func(char *, void *), void *arg, int flags); int dmu_objset_find_spa(spa_t *spa, const char *name, int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags); +int dmu_objset_prefetch(char *name, void *arg); void dmu_objset_byteswap(void *buf, size_t size); int dmu_objset_evict_dbufs(objset_t *os); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h index 99bcb915911..b0b758b3716 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h @@ -333,7 +333,8 @@ extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props); extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); extern int spa_destroy(char *pool); -extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force); +extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, + boolean_t hardforce); extern int spa_reset(char *pool); extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c index 040e4d70fc0..8650fa117e1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c @@ -38,15 +38,22 @@ static void txg_quiesce_thread(void *arg); int zfs_txg_timeout = 30; /* max seconds worth of delta per txg */ extern int zfs_txg_synctime; +extern uint64_t zfs_write_limit_override; SYSCTL_DECL(_vfs_zfs); -SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS TXG"); +SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, + "ZFS transaction groups (TXG)"); TUNABLE_INT("vfs.zfs.txg.timeout", &zfs_txg_timeout); SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, timeout, CTLFLAG_RDTUN, &zfs_txg_timeout, 0, "Maximum seconds worth of delta per txg"); TUNABLE_INT("vfs.zfs.txg.synctime", &zfs_txg_synctime); SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, synctime, CTLFLAG_RDTUN, &zfs_txg_synctime, 0, "Target seconds to sync a txg"); +TUNABLE_QUAD("vfs.zfs.txg.write_limit_override", &zfs_write_limit_override); +SYSCTL_QUAD(_vfs_zfs_txg, OID_AUTO, write_limit_override, CTLFLAG_RW, + &zfs_write_limit_override, 0, + "Override maximum size of a txg to this size in bytes, " + "value of 0 means don't override"); /* * Prepare the txg subsystem. diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c index eb93721e167..9825d83fb67 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c @@ -2235,11 +2235,24 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, return (EPERM); } +#ifdef sun if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) && (zp->z_phys->zp_flags & ZFS_NOUNLINK)) { *check_privs = B_FALSE; return (EPERM); } +#else + /* + * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK + * (sunlnk) is set. We just don't allow directory removal, which is + * handled in zfs_zaccess_delete(). + */ + if ((v4_mode & ACE_DELETE) && + (zp->z_phys->zp_flags & ZFS_NOUNLINK)) { + *check_privs = B_FALSE; + return (EPERM); + } +#endif if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) && (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED))) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c index b6c43f4245f..cd36696f950 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -50,7 +48,6 @@ zfs_oldace_byteswap(ace_t *ace, int ace_cnt) void zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) { -#ifdef TODO caddr_t end; caddr_t ptr; zfs_ace_t *zacep; @@ -64,6 +61,20 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) while (ptr < end) { if (zfs_layout) { + /* + * Avoid overrun. Embedded aces can have one + * of several sizes. We don't know exactly + * how many our present, only the size of the + * buffer containing them. That size may be + * larger than needed to hold the aces + * present. As long as we do not do any + * swapping beyond the end of our block we are + * okay. It it safe to swap any non-ace data + * within the block since it is just zeros. + */ + if (ptr + sizeof (zfs_ace_hdr_t) > end) { + break; + } zacep = (zfs_ace_t *)ptr; zacep->z_hdr.z_access_mask = BSWAP_32(zacep->z_hdr.z_access_mask); @@ -72,6 +83,10 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) BSWAP_16(zacep->z_hdr.z_type); entry_type = zacep->z_hdr.z_flags & ACE_TYPE_FLAGS; } else { + /* Overrun avoidance */ + if (ptr + sizeof (ace_t) > end) { + break; + } acep = (ace_t *)ptr; acep->a_access_mask = BSWAP_32(acep->a_access_mask); acep->a_flags = BSWAP_16(acep->a_flags); @@ -88,8 +103,14 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) break; case ACE_IDENTIFIER_GROUP: default: + /* Overrun avoidance */ if (zfs_layout) { - zacep->z_fuid = BSWAP_64(zacep->z_fuid); + if (ptr + sizeof (zfs_ace_t) <= end) { + zacep->z_fuid = BSWAP_64(zacep->z_fuid); + } else { + entry_size = sizeof (zfs_ace_t); + break; + } } switch (ace_type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: @@ -108,9 +129,6 @@ zfs_ace_byteswap(void *buf, size_t size, boolean_t zfs_layout) } ptr = ptr + entry_size; } -#else /* TODO */ - panic("%s:%u: TODO", __func__, __LINE__); -#endif /* TODO */ } /* ARGSUSED */ @@ -173,7 +191,8 @@ zfs_znode_byteswap(void *buf, size_t size) if (zp->zp_acl.z_acl_version == ZFS_ACL_VERSION) { zfs_acl_byteswap((void *)&zp->zp_acl.z_ace_data[0], ZFS_ACE_SPACE); - } else + } else { zfs_oldace_byteswap((ace_t *)&zp->zp_acl.z_ace_data[0], ACE_SLOT_CNT); + } } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 00e446b82e9..c7bd8ad56c1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -882,9 +882,10 @@ zfs_ioc_pool_export(zfs_cmd_t *zc) { int error; boolean_t force = (boolean_t)zc->zc_cookie; + boolean_t hardforce = (boolean_t)zc->zc_guid; zfs_log_history(zc); - error = spa_export(zc->zc_name, NULL, force); + error = spa_export(zc->zc_name, NULL, force, hardforce); return (error); } @@ -1349,6 +1350,14 @@ zfs_ioc_dataset_list_next(zfs_cmd_t *zc) (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); p = zc->zc_name + strlen(zc->zc_name); + if (zc->zc_cookie == 0) { + uint64_t cookie = 0; + int len = sizeof (zc->zc_name) - (p - zc->zc_name); + + while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) + dmu_objset_prefetch(p, NULL); + } + do { error = dmu_dir_list_next(os, sizeof (zc->zc_name) - (p - zc->zc_name), p, @@ -1387,6 +1396,9 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) objset_t *os; int error; + if (zc->zc_cookie == 0) + dmu_objset_find(zc->zc_name, dmu_objset_prefetch, + NULL, DS_FIND_SNAPSHOTS); error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os); if (error) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c index ad8165b3157..07c9b61bd32 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c @@ -868,13 +868,15 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) ZFS_ENTER_NOERROR(zfsvfs); error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); + + ZFS_EXIT(zfsvfs); + if (error == 0) { *vpp = ZTOV(rootzp); error = vn_lock(*vpp, flags); (*vpp)->v_vflag |= VV_ROOT; } - ZFS_EXIT(zfsvfs); return (error); } @@ -1143,13 +1145,13 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) VN_RELE(ZTOV(zp)); err = EINVAL; } + ZFS_EXIT(zfsvfs); if (err != 0) *vpp = NULL; else { *vpp = ZTOV(zp); vn_lock(*vpp, flags); } - ZFS_EXIT(zfsvfs); return (err); } @@ -1237,8 +1239,8 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) } else { VN_HOLD(*vpp); } - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); ZFS_EXIT(zfsvfs); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); return (0); } @@ -1259,10 +1261,11 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) return (EINVAL); } + ZFS_EXIT(zfsvfs); + *vpp = ZTOV(zp); vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); - ZFS_EXIT(zfsvfs); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 4f61f5f3f5e..225beca116d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -1209,15 +1209,17 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } + ZFS_EXIT(zfsvfs); error = vn_lock(*vpp, cnp->cn_lkflags); if (cnp->cn_flags & ISDOTDOT) vn_lock(dvp, ltype | LK_RETRY); if (error != 0) { VN_RELE(*vpp); *vpp = NULL; - ZFS_EXIT(zfsvfs); return (error); } + } else { + ZFS_EXIT(zfsvfs); } #ifdef FREEBSD_NAMECACHE @@ -1237,8 +1239,6 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, } #endif - ZFS_EXIT(zfsvfs); - return (error); } @@ -3977,6 +3977,9 @@ zfs_freebsd_write(ap) } */ *ap; { + if (vn_rlimit_fsize(ap->a_vp, ap->a_uio, ap->a_uio->uio_td)) + return (EFBIG); + return (zfs_write(ap->a_vp, ap->a_uio, ap->a_ioflag, ap->a_cred, NULL)); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 900087b8b32..947f9dd39f4 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -704,6 +704,8 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); } } + + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); dmu_buf_will_dirty(db, tx); @@ -765,9 +767,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode); if (!(flag & IS_ROOT_NODE)) { - ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); *zpp = zfs_znode_alloc(zfsvfs, db, 0); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } else { /* * If we are creating the root node, the "parent" we @@ -776,6 +776,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, *zpp = dzp; } zfs_perm_init(*zpp, dzp, flag, vap, tx, cr, setaclp, fuidp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); if (!(flag & IS_ROOT_NODE)) { vnode_t *vp; @@ -939,19 +940,31 @@ again: /* * Not found create new znode/vnode + * but only if file exists. + * + * There is a small window where zfs_vget() could + * find this object while a file create is still in + * progress. Since a gen number can never be zero + * we will check that to determine if its an allocated + * file. */ - zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); - - vp = ZTOV(zp); - vp->v_vflag |= VV_FORCEINSMQ; - err = insmntque(vp, zfsvfs->z_vfs); - vp->v_vflag &= ~VV_FORCEINSMQ; - KASSERT(err == 0, ("insmntque() failed: error %d", err)); - VOP_UNLOCK(vp, 0); + if (((znode_phys_t *)db->db_data)->zp_gen != 0) { + zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); + *zpp = zp; + vp = ZTOV(zp); + vp->v_vflag |= VV_FORCEINSMQ; + err = insmntque(vp, zfsvfs->z_vfs); + vp->v_vflag &= ~VV_FORCEINSMQ; + KASSERT(err == 0, ("insmntque() failed: error %d", err)); + VOP_UNLOCK(vp, 0); + err = 0; + } else { + dmu_buf_rele(db, NULL); + err = ENOENT; + } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - *zpp = zp; - return (0); + return (err); } int @@ -1440,6 +1453,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) uint64_t norm = 0; nvpair_t *elem; int error; + int i; znode_t *rootzp = NULL; vnode_t vnode; vattr_t vattr; @@ -1537,6 +1551,9 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); rootzp->z_zfsvfs = &zfsvfs; zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, NULL, NULL); @@ -1547,6 +1564,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) dmu_buf_rele(rootzp->z_dbuf, NULL); rootzp->z_dbuf = NULL; + for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + mutex_destroy(&zfsvfs.z_hold_mtx[i]); mutex_destroy(&zfsvfs.z_znodes_lock); rootzp->z_vnode = NULL; kmem_cache_free(znode_cache, rootzp); diff --git a/sys/cddl/dev/cyclic/amd64/cyclic_machdep.c b/sys/cddl/dev/cyclic/amd64/cyclic_machdep.c deleted file mode 100644 index 9a15b2cdad0..00000000000 --- a/sys/cddl/dev/cyclic/amd64/cyclic_machdep.c +++ /dev/null @@ -1,133 +0,0 @@ -/*- - * Copyright 2007 John Birrell - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - * - */ - -static void enable(cyb_arg_t); -static void disable(cyb_arg_t); -static void reprogram(cyb_arg_t, hrtime_t); -static void xcall(cyb_arg_t, cpu_t *, cyc_func_t, void *); - -static cyc_backend_t be = { - NULL, /* cyb_configure */ - NULL, /* cyb_unconfigure */ - enable, - disable, - reprogram, - xcall, - NULL /* cyb_arg_t cyb_arg */ -}; - -static void -cyclic_ap_start(void *dummy) -{ - /* Initialise the rest of the CPUs. */ - cyclic_mp_init(); -} - -SYSINIT(cyclic_ap_start, SI_SUB_SMP, SI_ORDER_ANY, cyclic_ap_start, NULL); - -/* - * Machine dependent cyclic subsystem initialisation. - */ -static void -cyclic_machdep_init(void) -{ - /* Register the cyclic backend. */ - cyclic_init(&be); -} - -static void -cyclic_machdep_uninit(void) -{ - int i; - - for (i = 0; i <= mp_maxid; i++) - /* Reset the cyclic clock callback hook. */ - lapic_cyclic_clock_func[i] = NULL; - - /* De-register the cyclic backend. */ - cyclic_uninit(); -} - -static hrtime_t exp_due[MAXCPU]; - -/* - * This function is the one registered by the machine dependent - * initialiser as the callback for high speed timer events. - */ -static void -cyclic_clock(struct trapframe *frame) -{ - cpu_t *c = &solaris_cpu[curcpu]; - - if (c->cpu_cyclic != NULL && gethrtime() >= exp_due[curcpu]) { - if (TRAPF_USERMODE(frame)) { - c->cpu_profile_pc = 0; - c->cpu_profile_upc = TRAPF_PC(frame); - } else { - c->cpu_profile_pc = TRAPF_PC(frame); - c->cpu_profile_upc = 0; - } - - c->cpu_intr_actv = 1; - - /* Fire any timers that are due. */ - cyclic_fire(c); - - c->cpu_intr_actv = 0; - } -} - -static void enable(cyb_arg_t arg) -{ - /* Register the cyclic clock callback function. */ - lapic_cyclic_clock_func[curcpu] = cyclic_clock; -} - -static void disable(cyb_arg_t arg) -{ - /* Reset the cyclic clock callback function. */ - lapic_cyclic_clock_func[curcpu] = NULL; -} - -static void reprogram(cyb_arg_t arg, hrtime_t exp) -{ - exp_due[curcpu] = exp; -} - -static void xcall(cyb_arg_t arg, cpu_t *c, cyc_func_t func, void *param) -{ - /* - * If the target CPU is the current one, just call the - * function. This covers the non-SMP case. - */ - if (c == &solaris_cpu[curcpu]) - (*func)(param); - else - smp_rendezvous_cpus((cpumask_t) (1 << c->cpuid), NULL, - func, smp_no_rendevous_barrier, param); -} diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c index fa40db9329c..0b6ab5936e2 100644 --- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c +++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c @@ -67,7 +67,7 @@ cyclic_machdep_uninit(void) for (i = 0; i <= mp_maxid; i++) /* Reset the cyclic clock callback hook. */ - lapic_cyclic_clock_func[i] = NULL; + cyclic_clock_func[i] = NULL; /* De-register the cyclic backend. */ cyclic_uninit(); @@ -105,13 +105,13 @@ cyclic_clock(struct trapframe *frame) static void enable(cyb_arg_t arg) { /* Register the cyclic clock callback function. */ - lapic_cyclic_clock_func[curcpu] = cyclic_clock; + cyclic_clock_func[curcpu] = cyclic_clock; } static void disable(cyb_arg_t arg) { /* Reset the cyclic clock callback function. */ - lapic_cyclic_clock_func[curcpu] = NULL; + cyclic_clock_func[curcpu] = NULL; } static void reprogram(cyb_arg_t arg, hrtime_t exp) diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h index e74da64ecf1..aad550e16b5 100644 --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -31,6 +31,7 @@ #include #include +#include #define PTRIN(v) (void *)(uintptr_t) (v) #define PTROUT(v) (u_int32_t)(uintptr_t) (v) @@ -229,4 +230,85 @@ struct mq_attr32 { int __reserved[4]; }; +struct kinfo_proc32 { + int ki_structsize; + int ki_layout; + uint32_t ki_args; + uint32_t ki_paddr; + uint32_t ki_addr; + uint32_t ki_tracep; + uint32_t ki_textvp; + uint32_t ki_fd; + uint32_t ki_vmspace; + uint32_t ki_wchan; + pid_t ki_pid; + pid_t ki_ppid; + pid_t ki_pgid; + pid_t ki_tpgid; + pid_t ki_sid; + pid_t ki_tsid; + short ki_jobc; + short ki_spare_short1; + dev_t ki_tdev; + sigset_t ki_siglist; + sigset_t ki_sigmask; + sigset_t ki_sigignore; + sigset_t ki_sigcatch; + uid_t ki_uid; + uid_t ki_ruid; + uid_t ki_svuid; + gid_t ki_rgid; + gid_t ki_svgid; + short ki_ngroups; + short ki_spare_short2; + gid_t ki_groups[KI_NGROUPS]; + uint32_t ki_size; + int32_t ki_rssize; + int32_t ki_swrss; + int32_t ki_tsize; + int32_t ki_dsize; + int32_t ki_ssize; + u_short ki_xstat; + u_short ki_acflag; + fixpt_t ki_pctcpu; + u_int ki_estcpu; + u_int ki_slptime; + u_int ki_swtime; + int ki_spareint1; + u_int64_t ki_runtime; + struct timeval32 ki_start; + struct timeval32 ki_childtime; + int ki_flag; + int ki_kiflag; + int ki_traceflag; + char ki_stat; + signed char ki_nice; + char ki_lock; + char ki_rqindex; + u_char ki_oncpu; + u_char ki_lastcpu; + char ki_ocomm[OCOMMLEN+1]; + char ki_wmesg[WMESGLEN+1]; + char ki_login[LOGNAMELEN+1]; + char ki_lockname[LOCKNAMELEN+1]; + char ki_comm[COMMLEN+1]; + char ki_emul[KI_EMULNAMELEN+1]; + char ki_sparestrings[68]; + int ki_spareints[KI_NSPARE_INT]; + u_int ki_cr_flags; + int ki_jid; + int ki_numthreads; + lwpid_t ki_tid; + struct priority ki_pri; + struct rusage32 ki_rusage; + struct rusage32 ki_rusage_ch; + uint32_t ki_pcb; + uint32_t ki_kstack; + uint32_t ki_udata; + uint32_t ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ + int ki_sparelongs[KI_NSPARE_LONG]; + int ki_sflag; + int ki_tdflags; +}; + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 7cc27be5ff3..f0fde2b3c14 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -129,6 +129,28 @@ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); #define RETVAL_LO 0 #endif +void +freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) +{ + + TV_CP(*s, *s32, ru_utime); + TV_CP(*s, *s32, ru_stime); + CP(*s, *s32, ru_maxrss); + CP(*s, *s32, ru_ixrss); + CP(*s, *s32, ru_idrss); + CP(*s, *s32, ru_isrss); + CP(*s, *s32, ru_minflt); + CP(*s, *s32, ru_majflt); + CP(*s, *s32, ru_nswap); + CP(*s, *s32, ru_inblock); + CP(*s, *s32, ru_oublock); + CP(*s, *s32, ru_msgsnd); + CP(*s, *s32, ru_msgrcv); + CP(*s, *s32, ru_nsignals); + CP(*s, *s32, ru_nvcsw); + CP(*s, *s32, ru_nivcsw); +} + int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { @@ -146,22 +168,7 @@ freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { - TV_CP(ru, ru32, ru_utime); - TV_CP(ru, ru32, ru_stime); - CP(ru, ru32, ru_maxrss); - CP(ru, ru32, ru_ixrss); - CP(ru, ru32, ru_idrss); - CP(ru, ru32, ru_isrss); - CP(ru, ru32, ru_minflt); - CP(ru, ru32, ru_majflt); - CP(ru, ru32, ru_nswap); - CP(ru, ru32, ru_inblock); - CP(ru, ru32, ru_oublock); - CP(ru, ru32, ru_msgsnd); - CP(ru, ru32, ru_msgrcv); - CP(ru, ru32, ru_nsignals); - CP(ru, ru32, ru_nvcsw); - CP(ru, ru32, ru_nivcsw); + freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); @@ -755,22 +762,7 @@ freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) if (error) return (error); if (uap->rusage != NULL) { - TV_CP(s, s32, ru_utime); - TV_CP(s, s32, ru_stime); - CP(s, s32, ru_maxrss); - CP(s, s32, ru_ixrss); - CP(s, s32, ru_idrss); - CP(s, s32, ru_isrss); - CP(s, s32, ru_minflt); - CP(s, s32, ru_majflt); - CP(s, s32, ru_nswap); - CP(s, s32, ru_inblock); - CP(s, s32, ru_oublock); - CP(s, s32, ru_msgsnd); - CP(s, s32, ru_msgrcv); - CP(s, s32, ru_nsignals); - CP(s, s32, ru_nvcsw); - CP(s, s32, ru_nivcsw); + freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); diff --git a/sys/compat/freebsd32/freebsd32_util.h b/sys/compat/freebsd32/freebsd32_util.h index f5ccbf16e04..3ac676d9902 100644 --- a/sys/compat/freebsd32/freebsd32_util.h +++ b/sys/compat/freebsd32/freebsd32_util.h @@ -94,9 +94,12 @@ int syscall32_module_handler(struct module *mod, int what, void *arg); int syscall32_helper_register(struct syscall_helper_data *sd); int syscall32_helper_unregister(struct syscall_helper_data *sd); -register_t *freebsd32_copyout_strings(struct image_params *imgp); struct iovec32; +struct rusage32; +register_t *freebsd32_copyout_strings(struct image_params *imgp); int freebsd32_copyiniov(struct iovec32 *iovp, u_int iovcnt, struct iovec **iov, int error); +void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32); + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_UTIL_H_ */ diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index e6a4f16564a..58d897e9d08 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -274,7 +274,7 @@ linprocfs_docpuinfo(PFS_FILL_ARGS) "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n" - "stepping\t: %d\n", + "stepping\t: %d\n\n", i, cpu_vendor, class, cpu, model, cpu_id & 0xf); /* XXX per-cpu vendor / class / model / id? */ } diff --git a/sys/compat/linux/linux_ioctl.c b/sys/compat/linux/linux_ioctl.c index 66009763404..261126bf706 100644 --- a/sys/compat/linux/linux_ioctl.c +++ b/sys/compat/linux/linux_ioctl.c @@ -2628,6 +2628,7 @@ bsd_to_linux_v4l_tuner(struct video_tuner *vt, struct l_video_tuner *lvt) return (0); } +#ifdef COMPAT_LINUX_V4L_CLIPLIST static int linux_to_bsd_v4l_clip(struct l_video_clip *lvc, struct video_clip *vc) { @@ -2638,6 +2639,7 @@ linux_to_bsd_v4l_clip(struct l_video_clip *lvc, struct video_clip *vc) vc->next = PTRIN(lvc->next); /* possible pointer size conversion */ return (0); } +#endif static int linux_to_bsd_v4l_window(struct l_video_window *lvw, struct video_window *vw) @@ -2698,6 +2700,7 @@ linux_to_bsd_v4l_code(struct l_video_code *lvc, struct video_code *vc) return (0); } +#ifdef COMPAT_LINUX_V4L_CLIPLIST static int linux_v4l_clip_copy(void *lvc, struct video_clip **ppvc) { @@ -2772,15 +2775,18 @@ linux_v4l_cliplist_copy(struct l_video_window *lvw, struct video_window *vw) * example of cliplist use. */ plvc = PTRIN(lvw->clips); + vw->clips = NULL; ppvc = &(vw->clips); while (clipcount-- > 0) { - if (plvc == 0) + if (plvc == 0) { error = EFAULT; - if (!error) - error = linux_v4l_clip_copy(plvc, ppvc); - if (error) { - linux_v4l_cliplist_free(vw); break; + } else { + error = linux_v4l_clip_copy(plvc, ppvc); + if (error) { + linux_v4l_cliplist_free(vw); + break; + } } ppvc = &((*ppvc)->next); plvc = PTRIN(((struct l_video_clip *) plvc)->next); @@ -2795,6 +2801,7 @@ linux_v4l_cliplist_copy(struct l_video_window *lvw, struct video_window *vw) } return (error); } +#endif static int linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) @@ -2818,6 +2825,12 @@ linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) case LINUX_VIDIOCGTUNER: if ((error = fget(td, args->fd, &fp)) != 0) return (error); + error = copyin((void *) args->arg, &l_vtun, sizeof(l_vtun)); + if (error) { + fdrop(fp, td); + return (error); + } + linux_to_bsd_v4l_tuner(&l_vtun, &vtun); error = fo_ioctl(fp, VIDIOCGTUNER, &vtun, td->td_ucred, td); if (!error) { bsd_to_linux_v4l_tuner(&vtun, &l_vtun); @@ -2836,7 +2849,7 @@ linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) return (error); } linux_to_bsd_v4l_tuner(&l_vtun, &vtun); - error = fo_ioctl(fp, VIDIOCSMICROCODE, &vtun, td->td_ucred, td); + error = fo_ioctl(fp, VIDIOCSTUNER, &vtun, td->td_ucred, td); fdrop(fp, td); return (error); @@ -2865,14 +2878,18 @@ linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) return (error); } linux_to_bsd_v4l_window(&l_vwin, &vwin); +#ifdef COMPAT_LINUX_V4L_CLIPLIST error = linux_v4l_cliplist_copy(&l_vwin, &vwin); if (error) { fdrop(fp, td); return (error); } +#endif error = fo_ioctl(fp, VIDIOCSWIN, &vwin, td->td_ucred, td); fdrop(fp, td); +#ifdef COMPAT_LINUX_V4L_CLIPLIST linux_v4l_cliplist_free(&vwin); +#endif return (error); case LINUX_VIDIOCGFBUF: @@ -2924,7 +2941,7 @@ linux_ioctl_v4l(struct thread *td, struct linux_ioctl_args *args) return (error); } linux_to_bsd_v4l_code(&l_vcode, &vcode); - error = fo_ioctl(fp, VIDIOCSTUNER, &vcode, td->td_ucred, td); + error = fo_ioctl(fp, VIDIOCSMICROCODE, &vcode, td->td_ucred, td); fdrop(fp, td); return (error); diff --git a/sys/compat/x86bios/x86bios.c b/sys/compat/x86bios/x86bios.c index d5512fc43f3..cb8b85e17ab 100644 --- a/sys/compat/x86bios/x86bios.c +++ b/sys/compat/x86bios/x86bios.c @@ -47,11 +47,15 @@ __FBSDID("$FreeBSD$"); #include #include -#include +#include #include #include +#if defined(__amd64__) || defined(__i386__) +#define X86BIOS_NATIVE_ARCH +#endif + #define X86BIOS_PAGE_SIZE 0x00001000 /* 4K */ #define X86BIOS_IVT_SIZE 0x00000500 /* 1K + 256 (BDA) */ @@ -236,27 +240,49 @@ x86bios_emu_inb(struct x86emu *emu, uint16_t port) if (port >= 0x80 && port < 0x88) /* POST status register */ return (0); - return (inb(port)); + return (iodev_read_1(port)); } static uint16_t x86bios_emu_inw(struct x86emu *emu, uint16_t port) { + uint16_t val; if (port >= 0x80 && port < 0x88) /* POST status register */ return (0); - return (inw(port)); +#ifndef X86BIOS_NATIVE_ARCH + if ((port & 1) != 0) { + val = iodev_read_1(port); + val |= iodev_read_1(port + 1) << 8; + } else +#endif + val = iodev_read_2(port); + + return (val); } static uint32_t x86bios_emu_inl(struct x86emu *emu, uint16_t port) { + uint32_t val; if (port >= 0x80 && port < 0x88) /* POST status register */ return (0); - return (inl(port)); +#ifndef X86BIOS_NATIVE_ARCH + if ((port & 1) != 0) { + val = iodev_read_1(port); + val |= iodev_read_2(port + 1) << 8; + val |= iodev_read_1(port + 3) << 24; + } else if ((port & 2) != 0) { + val = iodev_read_2(port); + val |= iodev_read_2(port + 2) << 16; + } else +#endif + val = iodev_read_4(port); + + return (val); } static void @@ -268,7 +294,7 @@ x86bios_emu_outb(struct x86emu *emu, uint16_t port, uint8_t val) if (port >= 0x80 && port < 0x88) /* POST status register */ return; - outb(port, val); + iodev_write_1(port, val); } static void @@ -278,7 +304,13 @@ x86bios_emu_outw(struct x86emu *emu, uint16_t port, uint16_t val) if (port >= 0x80 && port < 0x88) /* POST status register */ return; - outw(port, val); +#ifndef X86BIOS_NATIVE_ARCH + if ((port & 1) != 0) { + iodev_write_1(port, val); + iodev_write_1(port + 1, val >> 8); + } else +#endif + iodev_write_2(port, val); } static void @@ -288,7 +320,17 @@ x86bios_emu_outl(struct x86emu *emu, uint16_t port, uint32_t val) if (port >= 0x80 && port < 0x88) /* POST status register */ return; - outl(port, val); +#ifndef X86BIOS_NATIVE_ARCH + if ((port & 1) != 0) { + iodev_write_1(port, val); + iodev_write_2(port + 1, val >> 8); + iodev_write_1(port + 3, val >> 24); + } else if ((port & 2) != 0) { + iodev_write_2(port, val); + iodev_write_2(port + 2, val >> 16); + } else +#endif + iodev_write_4(port, val); } static void @@ -484,45 +526,53 @@ x86bios_match_device(uint32_t offset, device_t dev) return (1); } -#if defined(__amd64__) || (defined(__i386__) && !defined(PC98)) -#define PROBE_EBDA 1 +static __inline void +x86bios_unmap_mem(void) +{ + + if (x86bios_ivt != NULL) +#ifdef X86BIOS_NATIVE_ARCH + pmap_unmapdev((vm_offset_t)x86bios_ivt, X86BIOS_IVT_SIZE); #else -#define PROBE_EBDA 0 + free(x86bios_ivt, M_DEVBUF); #endif + if (x86bios_rom != NULL) + pmap_unmapdev((vm_offset_t)x86bios_rom, X86BIOS_ROM_SIZE); + if (x86bios_seg != NULL) + contigfree(x86bios_seg, X86BIOS_SEG_SIZE, M_DEVBUF); +} static __inline int x86bios_map_mem(void) { +#ifdef X86BIOS_NATIVE_ARCH x86bios_ivt = pmap_mapbios(X86BIOS_IVT_BASE, X86BIOS_IVT_SIZE); - if (x86bios_ivt == NULL) - return (1); -#if PROBE_EBDA +#ifndef PC98 /* Probe EBDA via BDA. */ - x86bios_rom_phys = *(uint16_t *)((vm_offset_t)x86bios_ivt + 0x40e); - x86bios_rom_phys = le16toh(x86bios_rom_phys) << 4; + x86bios_rom_phys = *(uint16_t *)((caddr_t)x86bios_ivt + 0x40e); + x86bios_rom_phys = x86bios_rom_phys << 4; if (x86bios_rom_phys != 0 && x86bios_rom_phys < X86BIOS_ROM_BASE && X86BIOS_ROM_BASE - x86bios_rom_phys <= 128 * 1024) x86bios_rom_phys = rounddown(x86bios_rom_phys, X86BIOS_PAGE_SIZE); else #endif +#else + x86bios_ivt = malloc(X86BIOS_IVT_SIZE, M_DEVBUF, M_ZERO | M_WAITOK); +#endif + x86bios_rom_phys = X86BIOS_ROM_BASE; x86bios_rom = pmap_mapdev(x86bios_rom_phys, X86BIOS_ROM_SIZE); - if (x86bios_rom == NULL) { - pmap_unmapdev((vm_offset_t)x86bios_ivt, X86BIOS_IVT_SIZE); - return (1); - } -#if PROBE_EBDA + if (x86bios_rom == NULL) + goto fail; +#if defined(X86BIOS_NATIVE_ARCH) && !defined(PC98) /* Change attribute for EBDA. */ if (x86bios_rom_phys < X86BIOS_ROM_BASE && pmap_change_attr((vm_offset_t)x86bios_rom, - X86BIOS_ROM_BASE - x86bios_rom_phys, PAT_WRITE_BACK) != 0) { - pmap_unmapdev((vm_offset_t)x86bios_ivt, X86BIOS_IVT_SIZE); - pmap_unmapdev((vm_offset_t)x86bios_rom, X86BIOS_ROM_SIZE); - return (1); - } + X86BIOS_ROM_BASE - x86bios_rom_phys, PAT_WRITE_BACK) != 0) + goto fail; #endif x86bios_seg = contigmalloc(X86BIOS_SEG_SIZE, M_DEVBUF, M_WAITOK, @@ -537,12 +587,10 @@ x86bios_map_mem(void) (uint32_t)x86bios_seg_phys, X86BIOS_SEG_SIZE + (uint32_t)x86bios_seg_phys - 1, x86bios_seg); -#if PROBE_EBDA if (x86bios_rom_phys < X86BIOS_ROM_BASE) printf("x86bios: EBDA 0x%06x-0x%06x at %p\n", (uint32_t)x86bios_rom_phys, X86BIOS_ROM_BASE - 1, x86bios_rom); -#endif printf("x86bios: ROM 0x%06x-0x%06x at %p\n", X86BIOS_ROM_BASE, X86BIOS_MEM_SIZE - X86BIOS_SEG_SIZE - 1, (void *)((vm_offset_t)x86bios_rom + X86BIOS_ROM_BASE - @@ -550,28 +598,22 @@ x86bios_map_mem(void) } return (0); + +fail: + x86bios_unmap_mem(); + + return (1); } -#undef PROBE_EBDA - -static __inline void -x86bios_unmap_mem(void) -{ - - pmap_unmapdev((vm_offset_t)x86bios_ivt, X86BIOS_IVT_SIZE); - pmap_unmapdev((vm_offset_t)x86bios_rom, X86BIOS_ROM_SIZE); - contigfree(x86bios_seg, X86BIOS_SEG_SIZE, M_DEVBUF); -} - -static void -x86bios_init(void *arg __unused) +static int +x86bios_init(void) { int i; - mtx_init(&x86bios_lock, "x86bios lock", NULL, MTX_SPIN); - if (x86bios_map_mem() != 0) - return; + return (ENOMEM); + + mtx_init(&x86bios_lock, "x86bios lock", NULL, MTX_SPIN); x86bios_map = malloc(sizeof(*x86bios_map) * X86BIOS_PAGES, M_DEVBUF, M_WAITOK | M_ZERO); @@ -600,10 +642,12 @@ x86bios_init(void *arg __unused) for (i = 0; i < 256; i++) x86bios_emu._x86emu_intrTab[i] = x86bios_emu_get_intr; + + return (0); } -static void -x86bios_uninit(void *arg __unused) +static int +x86bios_uninit(void) { vm_offset_t *map = x86bios_map; @@ -618,6 +662,8 @@ x86bios_uninit(void *arg __unused) x86bios_unmap_mem(); mtx_destroy(&x86bios_lock); + + return (0); } static int @@ -626,16 +672,12 @@ x86bios_modevent(module_t mod __unused, int type, void *data __unused) switch (type) { case MOD_LOAD: - x86bios_init(NULL); - break; + return (x86bios_init()); case MOD_UNLOAD: - x86bios_uninit(NULL); - break; + return (x86bios_uninit()); default: return (ENOTSUP); } - - return (0); } static moduledata_t x86bios_mod = { diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 8a2023ddc1c..1ec82a9c83c 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1660,12 +1660,14 @@ device twe # 3ware ATA RAID # Serial ATA host controllers: # # ahci: Advanced Host Controller Interface (AHCI) compatible +# mvs: Marvell 88SX50XX/88SX60XX/88SX70XX/SoC controllers # siis: SiliconImage SiI3124/SiI3132/SiI3531 controllers # # These drivers are part of cam(4) subsystem. They supersede less featured # ata(4) subsystem drivers, supporting same hardware. device ahci +device mvs device siis # @@ -1952,6 +1954,7 @@ device xmphy # XaQti XMAC II # This includes dual and quad port cards, as well as one 100baseFX card. # Most of these are 64-bit PCI devices, except for one single port # card which is 32-bit. +# sge: Silicon Integrated Systems SiS190/191 Fast/Gigabit Ethernet adapter # sis: Support for NICs based on the Silicon Integrated Systems SiS 900, # SiS 7016 and NS DP83815 PCI fast ethernet controller chips. # sk: Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs. @@ -2046,6 +2049,7 @@ device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device pcn # AMD Am79C97x PCI 10/100 NICs device sf # Adaptec AIC-6915 (``Starfire'') +device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) @@ -2089,6 +2093,15 @@ device ath_hal # pci/cardbus chip support #device ath_rf5413 #device ath_ar5416 # AR5416 chips options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors +# All of the AR5212 parts have a problem when paired with the AR71xx +# CPUS. These parts have a bug that triggers a fatal bus error on the AR71xx +# only. Details of the exact nature of the bug are sketchy, but some can be +# found at https://forum.openwrt.org/viewtopic.php?pid=70060 on pages 4, 5 and +# 6. This option enables this workaround. There is a performance penalty +# for this work around, but without it things don't work at all. The DMA +# from the card usually bursts 128 bytes, but on the affected CPUs, only +# 4 are safe. +options AH_RXCFG_SDMAMW_4BYTES #device ath_ar9160 # AR9160 chips #device ath_ar9280 # AR9280 chips #device ath_ar9285 # AR9285 chips diff --git a/sys/conf/files b/sys/conf/files index 7dd1b0ed3cd..a485f2f0796 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -174,6 +174,8 @@ contrib/dev/acpica/dispatcher/dswstate.c optional acpi contrib/dev/acpica/events/evevent.c optional acpi contrib/dev/acpica/events/evgpe.c optional acpi contrib/dev/acpica/events/evgpeblk.c optional acpi +contrib/dev/acpica/events/evgpeinit.c optional acpi +contrib/dev/acpica/events/evgpeutil.c optional acpi contrib/dev/acpica/events/evmisc.c optional acpi contrib/dev/acpica/events/evregion.c optional acpi contrib/dev/acpica/events/evrgnini.c optional acpi @@ -1291,6 +1293,9 @@ dev/mpt/mpt_pci.c optional mpt pci dev/mpt/mpt_raid.c optional mpt dev/mpt/mpt_user.c optional mpt dev/msk/if_msk.c optional msk inet +dev/mvs/mvs.c optional mvs +dev/mvs/mvs_if.m standard +dev/mvs/mvs_pci.c optional mvs pci dev/mwl/if_mwl.c optional mwl dev/mwl/if_mwl_pci.c optional mwl pci dev/mwl/mwlhal.c optional mwl @@ -1490,6 +1495,7 @@ dev/scd/scd.c optional scd isa dev/scd/scd_isa.c optional scd isa dev/sdhci/sdhci.c optional sdhci pci dev/sf/if_sf.c optional sf pci +dev/sge/if_sge.c optional sge pci dev/si/si.c optional si dev/si/si2_z280.c optional si dev/si/si3_t225.c optional si diff --git a/sys/conf/files.mips b/sys/conf/files.mips index 60ab0c2102d..f88949ce9d8 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -50,7 +50,6 @@ mips/mips/bus_space_generic.c standard mips/mips/busdma_machdep.c standard mips/mips/cache.c standard mips/mips/cache_mipsNN.c standard -#mips/mips/copystr.S standard mips/mips/db_disasm.c optional ddb mips/mips/db_interface.c optional ddb mips/mips/db_trace.c optional ddb diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64 index a822a0ed817..3a0620a2d09 100644 --- a/sys/conf/files.sparc64 +++ b/sys/conf/files.sparc64 @@ -22,7 +22,7 @@ ukbdmap.h optional ukbd_dflt_keymap \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # -crypto/blowfish/bf_enc.c optional crypto | ipsec +crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc @@ -42,11 +42,11 @@ dev/kbd/kbd.c optional atkbd | sc | ukbd dev/le/if_le_lebuffer.c optional le sbus dev/le/if_le_ledma.c optional le sbus dev/le/lebuffer_sbus.c optional le sbus -dev/ofw/ofw_if.m standard dev/ofw/ofw_bus_if.m standard dev/ofw/ofw_bus_subr.c standard -dev/ofw/ofw_standard.c standard dev/ofw/ofw_console.c optional ofw_console +dev/ofw/ofw_if.m standard +dev/ofw/ofw_standard.c standard dev/ofw/openfirm.c standard dev/ofw/openfirmio.c standard dev/ofw/openpromio.c standard @@ -79,7 +79,7 @@ sparc64/pci/ofw_pcib.c optional pci sparc64/pci/ofw_pcib_subr.c optional pci sparc64/pci/ofw_pcibus.c optional pci sparc64/pci/psycho.c optional pci -sparc64/pci/sbbc.c optional uart sbbc +sparc64/pci/sbbc.c optional sbbc uart sparc64/pci/schizo.c optional pci sparc64/sbus/dma_sbus.c optional sbus sparc64/sbus/sbus.c optional sbus @@ -138,3 +138,4 @@ sparc64/sparc64/tsb.c standard sparc64/sparc64/uio_machdep.c standard sparc64/sparc64/upa.c optional creator sparc64/sparc64/vm_machdep.c standard +sparc64/sparc64/zeus.c standard diff --git a/sys/conf/files.sun4v b/sys/conf/files.sun4v index 5964f5659fc..62e9e9be4a5 100644 --- a/sys/conf/files.sun4v +++ b/sys/conf/files.sun4v @@ -18,12 +18,12 @@ ukbdmap.h optional ukbd_dflt_keymap \ clean "ukbdmap.h" # # -crypto/blowfish/bf_enc.c optional crypto | ipsec +crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/ofw/ofw_bus_if.m standard -dev/ofw/ofw_if.m standard dev/ofw/ofw_bus_subr.c standard dev/ofw/ofw_console.c optional ofw_console +dev/ofw/ofw_if.m standard dev/ofw/ofw_standard.c standard dev/ofw/openfirm.c standard dev/ofw/openfirmio.c standard diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index 1852cb7885c..ac275d4d3d0 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -128,7 +128,8 @@ NORMAL_C_NOWERROR= ${CC} -c ${CFLAGS} ${PROF} ${.IMPSRC} NORMAL_M= ${AWK} -f $S/tools/makeobjops.awk ${.IMPSRC} -c ; \ ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.PREFIX}.c -NORMAL_CTFCONVERT= @[ -z "${CTFCONVERT}" -o -n "${NO_CTF}" ] || ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} +NORMAL_CTFCONVERT= [ -z "${CTFCONVERT}" -o -n "${NO_CTF}" ] || \ + ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC} diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index ec11e9ed066..77da7c871c5 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -137,6 +137,10 @@ CFLAGS+= -mlongcall -fno-omit-frame-pointer CFLAGS+= -G0 -fno-pic -mno-abicalls -mlong-calls .endif +.if defined(DEBUG) || defined(DEBUG_FLAGS) +CTFFLAGS+= -g +.endif + .if defined(FIRMWS) .if !exists(@) ${KMOD:S/$/.c/}: @ @@ -204,6 +208,7 @@ ${KMOD}.kld: ${OBJS} ${FULLPROG}: ${OBJS} .endif ${LD} ${LDFLAGS} -r -d -o ${.TARGET} ${OBJS} + @[ -z "${CTFMERGE}" -o -n "${NO_CTF}" ] || ${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${OBJS} .if defined(EXPORT_SYMS) .if ${EXPORT_SYMS} != YES .if ${EXPORT_SYMS} == NO @@ -340,7 +345,7 @@ MFILES?= dev/acpica/acpi_if.m dev/acpi_support/acpi_wmi_if.m \ dev/agp/agp_if.m dev/ata/ata_if.m dev/eisa/eisa_if.m \ dev/iicbus/iicbb_if.m dev/iicbus/iicbus_if.m \ dev/mmc/mmcbr_if.m dev/mmc/mmcbus_if.m \ - dev/mii/miibus_if.m dev/ofw/ofw_bus_if.m \ + dev/mii/miibus_if.m dev/mvs/mvs_if.m dev/ofw/ofw_bus_if.m \ dev/pccard/card_if.m dev/pccard/power_if.m dev/pci/pci_if.m \ dev/pci/pcib_if.m dev/ppbus/ppbus_if.m dev/smbus/smbus_if.m \ dev/sound/pcm/ac97_if.m dev/sound/pcm/channel_if.m \ diff --git a/sys/conf/options b/sys/conf/options index fabc0bf772f..8f1dcaaa90b 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -771,6 +771,7 @@ AH_PRIVATE_DIAG opt_ah.h AH_NEED_DESC_SWAP opt_ah.h AH_USE_INIPDGAIN opt_ah.h AH_MAXCHAN opt_ah.h +AH_RXCFG_SDMAMW_4BYTES opt_ah.h # options for the Broadcom BCM43xx driver (bwi) BWI_DEBUG opt_bwi.h diff --git a/sys/conf/options.arm b/sys/conf/options.arm index 922d38fda0c..6b1928195c7 100644 --- a/sys/conf/options.arm +++ b/sys/conf/options.arm @@ -37,3 +37,4 @@ AT91_BWCT opt_at91.h AT91_TSC opt_at91.h AT91_KWIKBYTE opt_at91.h CPU_FA526 opt_global.h +CPU_FA626TE opt_global.h diff --git a/sys/contrib/dev/acpica/acpica_prep.sh b/sys/contrib/dev/acpica/acpica_prep.sh index 3a17041846f..ebbf8cff148 100755 --- a/sys/contrib/dev/acpica/acpica_prep.sh +++ b/sys/contrib/dev/acpica/acpica_prep.sh @@ -19,7 +19,7 @@ fulldirs="common compiler debugger disassembler dispatcher events \ tools utilities" # files to remove -stripdirs="acpisrc acpixtract examples generate os_specific" +stripdirs="acpisrc acpixtract examples generate os_specific tests" stripfiles="Makefile README acintel.h aclinux.h acmsvc.h acnetbsd.h \ acos2.h accygwin.h acefi.h acwin.h acwin64.h aeexec.c \ aehandlers.c aemain.c aetables.c osunixdir.c readme.txt \ diff --git a/sys/contrib/dev/acpica/changes.txt b/sys/contrib/dev/acpica/changes.txt index d2c8c0e41ed..fed0395e61a 100644 --- a/sys/contrib/dev/acpica/changes.txt +++ b/sys/contrib/dev/acpica/changes.txt @@ -1,3 +1,69 @@ +---------------------------------------- +28 April 2010. Summary of changes for version 20100428: + +1) ACPI CA Core Subsystem: + +Implemented GPE support for dynamically loaded ACPI tables. For all GPEs, +including FADT-based and GPE Block Devices, execute any _PRW methods in the +new table, and process any _Lxx/_Exx GPE methods in the new table. Any +runtime GPE that is referenced by an _Lxx/_Exx method in the new table is +immediately enabled. Handles the FADT-defined GPEs as well as GPE Block +Devices. Provides compatibility with other ACPI implementations. Two new +files added, evgpeinit.c and evgpeutil.c. ACPICA BZ 833. Lin Ming, Bob Moore. + +Fixed a regression introduced in version 20100331 within the table manager +where initial table loading could fail. This was introduced in the fix for +AcpiReallocateRootTable. Also, renamed some of fields in the table manager +data structures to clarify their meaning and use. + +Fixed a possible allocation overrun during internal object copy in +AcpiUtCopySimpleObject. The original code did not correctly handle the case +where the object to be copied was a namespace node. Lin Ming. ACPICA BZ 847. + +Updated the allocation dump routine, AcpiUtDumpAllocation and fixed a +possible access beyond end-of-allocation. Also, now fully validate descriptor +(size and type) before output. Lin Ming, Bob Moore. ACPICA BZ 847 + +Example Code and Data Size: These are the sizes for the OS-independent +acpica.lib produced by the Microsoft Visual C++ 6.0 32-bit compiler. The +debug version of the code includes the debug output trace mechanism and has a +much larger code and data size. + + Previous Release: + Non-Debug Version: 87.9K Code, 18.6K Data, 106.5K Total + Debug Version: 163.5K Code, 51.3K Data, 214.8K Total + Current Release: + Non-Debug Version: 88.4K Code, 18.8K Data, 107.2K Total + Debug Version: 164.2K Code, 51.5K Data, 215.7K Total + +2) iASL Compiler/Disassembler and Tools: + +iASL: Implemented Min/Max/Len/Gran validation for address resource +descriptors. This change implements validation for the address fields that +are common to all address-type resource descriptors. These checks are +implemented: Checks for valid Min/Max, length within the Min/Max window, +valid granularity, Min/Max a multiple of granularity, and _MIF/_MAF as per +table 6-40 in the ACPI 4.0a specification. Also split the large aslrestype1.c +and aslrestype2.c files into five new files. ACPICA BZ 840. + +iASL: Added support for the _Wxx predefined names. This support was missing +and these names were not recognized by the compiler as valid predefined +names. ACPICA BZ 851. + +iASL: Added an error for all predefined names that are defined to return no +value and thus must be implemented as Control Methods. These include all of +the _Lxx, _Exx, _Wxx, and _Qxx names, as well as some other miscellaneous +names such as _DIS, _INI, _IRC, _OFF, _ON, and _PSx. ACPICA BZ 850, 856. + +iASL: Implemented the -ts option to emit hex AML data in ASL format, as an +ASL Buffer. Allows ACPI tables to be easily included within ASL files, to be +dynamically loaded via the Load() operator. Also cleaned up output for the - +ta and -tc options. ACPICA BZ 853. + +Tests: Added a new file with examples of extended iASL error checking. +Demonstrates the advanced error checking ability of the iASL compiler. +Available at tests/misc/badcode.asl. + ---------------------------------------- 31 March 2010. Summary of changes for version 20100331: diff --git a/sys/contrib/dev/acpica/common/adisasm.c b/sys/contrib/dev/acpica/common/adisasm.c index fe5ae35731d..91559c2cf2d 100644 --- a/sys/contrib/dev/acpica/common/adisasm.c +++ b/sys/contrib/dev/acpica/common/adisasm.c @@ -282,8 +282,8 @@ AdInitialize ( /* Setup the Table Manager (cheat - there is no RSDT) */ - AcpiGbl_RootTableList.Size = 1; - AcpiGbl_RootTableList.Count = 0; + AcpiGbl_RootTableList.MaxTableCount = 1; + AcpiGbl_RootTableList.CurrentTableCount = 0; AcpiGbl_RootTableList.Tables = LocalTables; return (Status); @@ -1156,7 +1156,7 @@ AdParseTable ( /* If LoadTable is FALSE, we are parsing the last loaded table */ - TableIndex = AcpiGbl_RootTableList.Count - 1; + TableIndex = AcpiGbl_RootTableList.CurrentTableCount - 1; /* Pass 2 */ diff --git a/sys/contrib/dev/acpica/compiler/aslcompile.c b/sys/contrib/dev/acpica/compiler/aslcompile.c index 443273c62c7..439c21af438 100644 --- a/sys/contrib/dev/acpica/compiler/aslcompile.c +++ b/sys/contrib/dev/acpica/compiler/aslcompile.c @@ -177,7 +177,8 @@ AslCompilerSignon ( { Prefix = "; "; } - else if (Gbl_HexOutputFlag == HEX_OUTPUT_C) + else if ((Gbl_HexOutputFlag == HEX_OUTPUT_C) || + (Gbl_HexOutputFlag == HEX_OUTPUT_ASL)) { FlPrintFile (ASL_FILE_HEX_OUTPUT, "/*\n"); Prefix = " * "; @@ -265,7 +266,8 @@ AslCompilerFileHeader ( { Prefix = "; "; } - else if (Gbl_HexOutputFlag == HEX_OUTPUT_C) + else if ((Gbl_HexOutputFlag == HEX_OUTPUT_C) || + (Gbl_HexOutputFlag == HEX_OUTPUT_ASL)) { Prefix = " * "; } diff --git a/sys/contrib/dev/acpica/compiler/aslcompiler.h b/sys/contrib/dev/acpica/compiler/aslcompiler.h index dbc2709725b..a277c67c885 100644 --- a/sys/contrib/dev/acpica/compiler/aslcompiler.h +++ b/sys/contrib/dev/acpica/compiler/aslcompiler.h @@ -592,6 +592,10 @@ FlFileError ( UINT32 FileId, UINT8 ErrorId); +UINT32 +FlGetFileSize ( + UINT32 FileId); + ACPI_STATUS FlReadFile ( UINT32 FileId, @@ -750,6 +754,34 @@ UtDoConstant ( /* * aslresource - Resource template generation utilities */ +void +RsSmallAddressCheck ( + UINT8 Type, + UINT32 Minimum, + UINT32 Maximum, + UINT32 Length, + UINT32 Alignment, + ACPI_PARSE_OBJECT *MinOp, + ACPI_PARSE_OBJECT *MaxOp, + ACPI_PARSE_OBJECT *LengthOp, + ACPI_PARSE_OBJECT *AlignOp); + +void +RsLargeAddressCheck ( + UINT64 Minimum, + UINT64 Maximum, + UINT64 Length, + UINT64 Granularity, + UINT8 Flags, + ACPI_PARSE_OBJECT *MinOp, + ACPI_PARSE_OBJECT *MaxOp, + ACPI_PARSE_OBJECT *LengthOp, + ACPI_PARSE_OBJECT *GranOp); + +UINT16 +RsGetStringDataLength ( + ACPI_PARSE_OBJECT *InitializerOp); + ASL_RESOURCE_NODE * RsAllocateResourceNode ( UINT32 Size); @@ -805,43 +837,18 @@ RsDoResourceTemplate ( /* - * aslrestype1 - generate Small descriptors + * aslrestype1 - Miscellaneous Small descriptors */ ASL_RESOURCE_NODE * RsDoEndTagDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); -ASL_RESOURCE_NODE * -RsDoDmaDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - ASL_RESOURCE_NODE * RsDoEndDependentDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); -ASL_RESOURCE_NODE * -RsDoFixedIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - -ASL_RESOURCE_NODE * -RsDoIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - -ASL_RESOURCE_NODE * -RsDoIrqDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - -ASL_RESOURCE_NODE * -RsDoIrqNoFlagsDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - ASL_RESOURCE_NODE * RsDoMemory24Descriptor ( ACPI_PARSE_OBJECT *Op, @@ -874,7 +881,36 @@ RsDoVendorSmallDescriptor ( /* - * aslrestype2 - generate Large descriptors + * aslrestype1i - I/O-related Small descriptors + */ +ASL_RESOURCE_NODE * +RsDoDmaDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + +ASL_RESOURCE_NODE * +RsDoFixedIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + +ASL_RESOURCE_NODE * +RsDoIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + +ASL_RESOURCE_NODE * +RsDoIrqDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + +ASL_RESOURCE_NODE * +RsDoIrqNoFlagsDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + + +/* + * aslrestype2 - Large resource descriptors */ ASL_RESOURCE_NODE * RsDoInterruptDescriptor ( @@ -882,6 +918,20 @@ RsDoInterruptDescriptor ( UINT32 CurrentByteOffset); ASL_RESOURCE_NODE * +RsDoVendorLargeDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + +ASL_RESOURCE_NODE * +RsDoGeneralRegisterDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset); + + +/* + * aslrestype2d - DWord address descriptors + */ +ASL_RESOURCE_NODE * RsDoDwordIoDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); @@ -896,6 +946,10 @@ RsDoDwordSpaceDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); + +/* + * aslrestype2e - Extended address descriptors + */ ASL_RESOURCE_NODE * RsDoExtendedIoDescriptor ( ACPI_PARSE_OBJECT *Op, @@ -911,6 +965,10 @@ RsDoExtendedSpaceDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); + +/* + * aslrestype2q - QWord address descriptors + */ ASL_RESOURCE_NODE * RsDoQwordIoDescriptor ( ACPI_PARSE_OBJECT *Op, @@ -926,6 +984,10 @@ RsDoQwordSpaceDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); + +/* + * aslrestype2w - Word address descriptors + */ ASL_RESOURCE_NODE * RsDoWordIoDescriptor ( ACPI_PARSE_OBJECT *Op, @@ -941,15 +1003,5 @@ RsDoWordBusNumberDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset); -ASL_RESOURCE_NODE * -RsDoVendorLargeDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - -ASL_RESOURCE_NODE * -RsDoGeneralRegisterDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset); - #endif /* __ASLCOMPILER_H */ diff --git a/sys/contrib/dev/acpica/compiler/asldefine.h b/sys/contrib/dev/acpica/compiler/asldefine.h index 75345a5d7f4..cff4ecf410e 100644 --- a/sys/contrib/dev/acpica/compiler/asldefine.h +++ b/sys/contrib/dev/acpica/compiler/asldefine.h @@ -128,7 +128,7 @@ #define CompilerId "ASL Optimizing Compiler" #define DisassemblerId "AML Disassembler" #define CompilerCopyright "Copyright (c) 2000 - 2010 Intel Corporation" -#define CompilerCompliance "Supports ACPI Specification Revision 4.0" +#define CompilerCompliance "Supports ACPI Specification Revision 4.0a" #define CompilerName "iasl" #define CompilerCreatorId "INTL" diff --git a/sys/contrib/dev/acpica/compiler/aslfiles.c b/sys/contrib/dev/acpica/compiler/aslfiles.c index f2e2dca3fc3..ca23834c0c9 100644 --- a/sys/contrib/dev/acpica/compiler/aslfiles.c +++ b/sys/contrib/dev/acpica/compiler/aslfiles.c @@ -234,6 +234,36 @@ FlOpenFile ( } +/******************************************************************************* + * + * FUNCTION: FlGetFileSize + * + * PARAMETERS: FileId - Index into file info array + * + * RETURN: File Size + * + * DESCRIPTION: Get current file size. Uses seek-to-EOF. File must be open. + * + ******************************************************************************/ + +UINT32 +FlGetFileSize ( + UINT32 FileId) +{ + FILE *fp; + UINT32 FileSize; + + + fp = Gbl_Files[FileId].Handle; + + fseek (fp, 0, SEEK_END); + FileSize = (UINT32) ftell (fp); + fseek (fp, 0, SEEK_SET); + + return (FileSize); +} + + /******************************************************************************* * * FUNCTION: FlReadFile diff --git a/sys/contrib/dev/acpica/compiler/aslglobal.h b/sys/contrib/dev/acpica/compiler/aslglobal.h index 01ab931e0d0..85bb3e2d9e6 100644 --- a/sys/contrib/dev/acpica/compiler/aslglobal.h +++ b/sys/contrib/dev/acpica/compiler/aslglobal.h @@ -188,6 +188,7 @@ ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_CompileTimesFlag, FALSE ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_FoldConstants, TRUE); ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_VerboseErrors, TRUE); ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_NoErrors, FALSE); +ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_NoResourceChecking, FALSE); ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_DisasmFlag, FALSE); ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_GetAllTables, FALSE); ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_IntegerOptimizationFlag, TRUE); @@ -200,6 +201,8 @@ ASL_EXTERN UINT8 ASL_INIT_GLOBAL (Gbl_WarningLevel, ASL_WARNI #define HEX_OUTPUT_NONE 0 #define HEX_OUTPUT_C 1 #define HEX_OUTPUT_ASM 2 +#define HEX_OUTPUT_ASL 3 + ASL_EXTERN BOOLEAN ASL_INIT_GLOBAL (Gbl_HexOutputFlag, HEX_OUTPUT_NONE); diff --git a/sys/contrib/dev/acpica/compiler/asllisting.c b/sys/contrib/dev/acpica/compiler/asllisting.c index ebac9ca4996..9458b929215 100644 --- a/sys/contrib/dev/acpica/compiler/asllisting.c +++ b/sys/contrib/dev/acpica/compiler/asllisting.c @@ -198,6 +198,10 @@ static void LsDoHexOutputAsm ( void); +static void +LsDoHexOutputAsl ( + void); + ACPI_STATUS LsTreeWriteWalk ( ACPI_PARSE_OBJECT *Op, @@ -1337,6 +1341,11 @@ LsDoHexOutput ( LsDoHexOutputAsm (); break; + case HEX_OUTPUT_ASL: + + LsDoHexOutputAsl (); + break; + default: /* No other output types supported */ break; @@ -1362,60 +1371,160 @@ static void LsDoHexOutputC ( void) { - UINT32 j; - UINT8 FileByte[HEX_TABLE_LINE_SIZE]; - UINT8 Buffer[4]; + UINT8 FileData[HEX_TABLE_LINE_SIZE]; + UINT32 LineLength; UINT32 Offset = 0; + UINT32 AmlFileSize; + UINT32 i; - FlPrintFile (ASL_FILE_HEX_OUTPUT, " * C source code output\n *\n */\n"); + /* Get AML size, seek back to start */ + + AmlFileSize = FlGetFileSize (ASL_FILE_AML_OUTPUT); + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " * C source code output\n"); + FlPrintFile (ASL_FILE_HEX_OUTPUT, " * AML code block contains 0x%X bytes\n *\n */\n", + AmlFileSize); FlPrintFile (ASL_FILE_HEX_OUTPUT, "unsigned char AmlCode[] =\n{\n"); - /* Start at the beginning of the AML file */ - - FlSeekFile (ASL_FILE_AML_OUTPUT, 0); - - /* Process all AML bytes in the AML file */ - - j = 0; - while (FlReadFile (ASL_FILE_AML_OUTPUT, &FileByte[j], 1) == AE_OK) + while (Offset < AmlFileSize) { - if (j == 0) + /* Read enough bytes needed for one output line */ + + LineLength = fread (FileData, 1, HEX_TABLE_LINE_SIZE, + Gbl_Files[ASL_FILE_AML_OUTPUT].Handle); + if (!LineLength) { - FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); + break; } - /* Convert each AML byte to hex */ + FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); - UtConvertByteToHex (FileByte[j], Buffer); - FlWriteFile (ASL_FILE_HEX_OUTPUT, Buffer, 4); - FlPrintFile (ASL_FILE_HEX_OUTPUT, ","); - - /* An occasional linefeed improves readability */ - - Offset++; - j++; - - if (j >= HEX_TABLE_LINE_SIZE) + for (i = 0; i < LineLength; i++) { - /* End of line, emit the ascii dump of the entire line */ - - FlPrintFile (ASL_FILE_HEX_OUTPUT, - " /* %8.8X", Offset - HEX_TABLE_LINE_SIZE); - - /* Write the ASCII character associated with each of the bytes */ - - LsDumpAsciiInComment (ASL_FILE_HEX_OUTPUT, - HEX_TABLE_LINE_SIZE, FileByte); - FlPrintFile (ASL_FILE_HEX_OUTPUT, " */\n"); - - /* Start new line */ - - j = 0; + /* + * Print each hex byte. + * Add a comma until the very last byte of the AML file + * (Some C compilers complain about a trailing comma) + */ + FlPrintFile (ASL_FILE_HEX_OUTPUT, "0x%2.2X", FileData[i]); + if ((Offset + i + 1) < AmlFileSize) + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, ","); + } + else + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); + } } + + /* Add fill spaces if needed for last line */ + + if (LineLength < HEX_TABLE_LINE_SIZE) + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, "%*s", + 5 * (HEX_TABLE_LINE_SIZE - LineLength), " "); + } + + /* Emit the offset and ascii dump for the entire line */ + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " /* %8.8X", Offset); + LsDumpAsciiInComment (ASL_FILE_HEX_OUTPUT, LineLength, FileData); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "%*s*/\n", + HEX_TABLE_LINE_SIZE - LineLength + 1, " "); + + Offset += LineLength; } - FlPrintFile (ASL_FILE_HEX_OUTPUT, "\n};\n"); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "};\n"); + FlCloseFile (ASL_FILE_HEX_OUTPUT); +} + + +/******************************************************************************* + * + * FUNCTION: LsDoHexOutputAsl + * + * PARAMETERS: None + * + * RETURN: None. + * + * DESCRIPTION: Create the hex output file. This is the same data as the AML + * output file, but formatted into hex/ascii bytes suitable for + * inclusion into a C source file. + * + ******************************************************************************/ + +static void +LsDoHexOutputAsl ( + void) +{ + UINT8 FileData[HEX_TABLE_LINE_SIZE]; + UINT32 LineLength; + UINT32 Offset = 0; + UINT32 AmlFileSize; + UINT32 i; + + + /* Get AML size, seek back to start */ + + AmlFileSize = FlGetFileSize (ASL_FILE_AML_OUTPUT); + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " * ASL source code output\n"); + FlPrintFile (ASL_FILE_HEX_OUTPUT, " * AML code block contains 0x%X bytes\n *\n */\n", + AmlFileSize); + FlPrintFile (ASL_FILE_HEX_OUTPUT, " Name (BUF1, Buffer()\n {\n"); + + while (Offset < AmlFileSize) + { + /* Read enough bytes needed for one output line */ + + LineLength = fread (FileData, 1, HEX_TABLE_LINE_SIZE, + Gbl_Files[ASL_FILE_AML_OUTPUT].Handle); + if (!LineLength) + { + break; + } + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); + + for (i = 0; i < LineLength; i++) + { + /* + * Print each hex byte. + * Add a comma until the very last byte of the AML file + * (Some C compilers complain about a trailing comma) + */ + FlPrintFile (ASL_FILE_HEX_OUTPUT, "0x%2.2X", FileData[i]); + if ((Offset + i + 1) < AmlFileSize) + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, ","); + } + else + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); + } + } + + /* Add fill spaces if needed for last line */ + + if (LineLength < HEX_TABLE_LINE_SIZE) + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, "%*s", + 5 * (HEX_TABLE_LINE_SIZE - LineLength), " "); + } + + /* Emit the offset and ascii dump for the entire line */ + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " /* %8.8X", Offset); + LsDumpAsciiInComment (ASL_FILE_HEX_OUTPUT, LineLength, FileData); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "%*s*/\n", + HEX_TABLE_LINE_SIZE - LineLength + 1, " "); + + Offset += LineLength; + } + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " })\n"); FlCloseFile (ASL_FILE_HEX_OUTPUT); } @@ -1438,58 +1547,64 @@ static void LsDoHexOutputAsm ( void) { - UINT32 j; - UINT8 FileByte[HEX_TABLE_LINE_SIZE]; - UINT8 Buffer[4]; + UINT8 FileData[HEX_TABLE_LINE_SIZE]; + UINT32 LineLength; UINT32 Offset = 0; - BOOLEAN DoComma = FALSE; + UINT32 AmlFileSize; + UINT32 i; - FlPrintFile (ASL_FILE_HEX_OUTPUT, "; Assembly code source output\n;\n"); + /* Get AML size, seek back to start */ - /* Start at the beginning of the AML file */ + AmlFileSize = FlGetFileSize (ASL_FILE_AML_OUTPUT); - FlSeekFile (ASL_FILE_AML_OUTPUT, 0); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "; Assembly code source output\n"); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "; AML code block contains 0x%X bytes\n;\n", + AmlFileSize); - /* Process all AML bytes in the AML file */ - - j = 0; - while (FlReadFile (ASL_FILE_AML_OUTPUT, &FileByte[j], 1) == AE_OK) + while (Offset < AmlFileSize) { - if (j == 0) + /* Read enough bytes needed for one output line */ + + LineLength = fread (FileData, 1, HEX_TABLE_LINE_SIZE, + Gbl_Files[ASL_FILE_AML_OUTPUT].Handle); + if (!LineLength) { - FlPrintFile (ASL_FILE_HEX_OUTPUT, " db "); - } - else if (DoComma) - { - FlPrintFile (ASL_FILE_HEX_OUTPUT, ","); - DoComma = FALSE; + break; } - /* Convert each AML byte to hex */ + FlPrintFile (ASL_FILE_HEX_OUTPUT, " db "); - UtConvertByteToAsmHex (FileByte[j], Buffer); - FlWriteFile (ASL_FILE_HEX_OUTPUT, Buffer, 4); - - /* An occasional linefeed improves readability */ - - Offset++; - j++; - if (j >= HEX_TABLE_LINE_SIZE) + for (i = 0; i < LineLength; i++) { - FlPrintFile (ASL_FILE_HEX_OUTPUT, - " ;%8.8X", Offset - HEX_TABLE_LINE_SIZE); - - /* Write the ASCII character associated with each of the bytes */ - - LsDumpAscii (ASL_FILE_HEX_OUTPUT, HEX_TABLE_LINE_SIZE, FileByte); - FlPrintFile (ASL_FILE_HEX_OUTPUT, "\n"); - j = 0; + /* + * Print each hex byte. + * Add a comma until the last byte of the line + */ + FlPrintFile (ASL_FILE_HEX_OUTPUT, "0%2.2Xh", FileData[i]); + if ((i + 1) < LineLength) + { + FlPrintFile (ASL_FILE_HEX_OUTPUT, ","); + } } - else + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " "); + + /* Add fill spaces if needed for last line */ + + if (LineLength < HEX_TABLE_LINE_SIZE) { - DoComma = TRUE; + FlPrintFile (ASL_FILE_HEX_OUTPUT, "%*s", + 5 * (HEX_TABLE_LINE_SIZE - LineLength), " "); } + + /* Emit the offset and ascii dump for the entire line */ + + FlPrintFile (ASL_FILE_HEX_OUTPUT, " ; %8.8X", Offset); + LsDumpAsciiInComment (ASL_FILE_HEX_OUTPUT, LineLength, FileData); + FlPrintFile (ASL_FILE_HEX_OUTPUT, "\n"); + + Offset += LineLength; } FlPrintFile (ASL_FILE_HEX_OUTPUT, "\n"); diff --git a/sys/contrib/dev/acpica/compiler/aslmain.c b/sys/contrib/dev/acpica/compiler/aslmain.c index e8b0c457ce5..76be1af0ecd 100644 --- a/sys/contrib/dev/acpica/compiler/aslmain.c +++ b/sys/contrib/dev/acpica/compiler/aslmain.c @@ -167,7 +167,7 @@ AslDoResponseFile ( #define ASL_TOKEN_SEPARATORS " \t\n" -#define ASL_SUPPORTED_OPTIONS "@:2b:cd^e:fgh^i^I:l^o:p:r:s:t:v:w:x:" +#define ASL_SUPPORTED_OPTIONS "@:2b:c:d^e:fgh^i^I:l^no:p:r:s:t:v:w:x:" /******************************************************************************* @@ -203,13 +203,14 @@ Options ( printf ("\nAML Output Files:\n"); printf (" -s Create AML in assembler or C source file (*.asm or *.c)\n"); printf (" -i Create assembler or C include file (*.inc or *.h)\n"); - printf (" -t Create AML in assembler or C hex table (*.hex)\n"); + printf (" -t Create AML in assembler, C, or ASL hex table (*.hex)\n"); printf ("\nAML Code Generation:\n"); printf (" -oa Disable all optimizations (compatibility mode)\n"); printf (" -of Disable constant folding\n"); printf (" -oi Disable integer optimization to Zero/One/Ones\n"); printf (" -on Disable named reference string optimization\n"); + printf (" -cr Disable Resource Descriptor error checking\n"); printf (" -r Override table header Revision (1-255)\n"); printf ("\nListings:\n"); @@ -264,7 +265,7 @@ HelpMessage ( printf (" -b Create compiler debug/trace file (*.txt)\n"); printf (" Types: Parse/Tree/Both\n"); printf (" -f Ignore errors, force creation of AML output file(s)\n"); - printf (" -c Parse only, no output generation\n"); + printf (" -n Parse only, no output generation\n"); printf (" -ot Display compile times\n"); printf (" -x Set debug level for trace output\n"); } @@ -507,10 +508,16 @@ AslDoOptions ( case 'c': + switch (AcpiGbl_Optarg[0]) + { + case 'r': + Gbl_NoResourceChecking = TRUE; + break; - /* Parse only */ - - Gbl_ParseOnlyFlag = TRUE; + default: + printf ("Unknown option: -c%s\n", AcpiGbl_Optarg); + return (-1); + } break; @@ -688,6 +695,14 @@ AslDoOptions ( break; + case 'n': + + /* Parse only */ + + Gbl_ParseOnlyFlag = TRUE; + break; + + case 'p': /* Override default AML output filename */ @@ -741,6 +756,10 @@ AslDoOptions ( Gbl_HexOutputFlag = HEX_OUTPUT_C; break; + case 's': + Gbl_HexOutputFlag = HEX_OUTPUT_ASL; + break; + default: printf ("Unknown option: -t%s\n", AcpiGbl_Optarg); return (-1); diff --git a/sys/contrib/dev/acpica/compiler/aslpredef.c b/sys/contrib/dev/acpica/compiler/aslpredef.c index b9632f9c6e1..c3e49cd05b0 100644 --- a/sys/contrib/dev/acpica/compiler/aslpredef.c +++ b/sys/contrib/dev/acpica/compiler/aslpredef.c @@ -243,11 +243,11 @@ ApCheckForPredefinedMethod ( break; - case ACPI_EVENT_RESERVED_NAME: /* _Lxx, _Exx, and _Qxx methods */ + case ACPI_EVENT_RESERVED_NAME: /* _Lxx/_Exx/_Wxx/_Qxx methods */ Gbl_ReservedMethods++; - /* NumArguments must be zero for all _Lxx, _Exx, and _Qxx methods */ + /* NumArguments must be zero for all _Lxx/_Exx/_Wxx/_Qxx methods */ if (MethodInfo->NumArguments != 0) { @@ -346,12 +346,12 @@ ApCheckPredefinedReturnValue ( case ACPI_NOT_RESERVED_NAME: /* No underscore or _Txx or _xxx name not matched */ case ACPI_PREDEFINED_NAME: /* Resource Name or reserved scope name */ case ACPI_COMPILER_RESERVED_NAME: /* A _Txx that was not emitted by compiler */ - case ACPI_EVENT_RESERVED_NAME: /* _Lxx, _Exx, and _Qxx methods */ + case ACPI_EVENT_RESERVED_NAME: /* _Lxx/_Exx/_Wxx/_Qxx methods */ /* Just return, nothing to do */ return; - default: /* a real predefined ACPI name */ + default: /* A standard predefined ACPI name */ /* Exit if no return value expected */ @@ -425,29 +425,59 @@ ApCheckForPredefinedObject ( * or a predefined scope name */ Index = ApCheckForPredefinedName (Op, Name); - if (Index > ACPI_VALID_RESERVED_NAME_MAX) - { - return; - } - /* - * We found a matching predefind name. - * Check if this predefined name requires input arguments - */ - if (PredefinedNames[Index].Info.ParamCount > 0) + switch (Index) { + case ACPI_NOT_RESERVED_NAME: /* No underscore or _Txx or _xxx name not matched */ + case ACPI_PREDEFINED_NAME: /* Resource Name or reserved scope name */ + case ACPI_COMPILER_RESERVED_NAME: /* A _Txx that was not emitted by compiler */ + + /* Nothing to do */ + return; + + case ACPI_EVENT_RESERVED_NAME: /* _Lxx/_Exx/_Wxx/_Qxx methods */ + /* - * This predefined name must always be defined as a control - * method because it is required to have input arguments. + * These names must be control methods, by definition in ACPI spec. + * Also because they are defined to return no value. None of them + * require any arguments. */ AslError (ASL_ERROR, ASL_MSG_RESERVED_METHOD, Op, - "with arguments"); + "with zero arguments"); + return; + + default: /* A standard predefined ACPI name */ + + /* + * If this predefined name requires input arguments, then + * it must be implemented as a control method + */ + if (PredefinedNames[Index].Info.ParamCount > 0) + { + AslError (ASL_ERROR, ASL_MSG_RESERVED_METHOD, Op, + "with arguments"); + return; + } + + /* + * If no return value is expected from this predefined name, then + * it follows that it must be implemented as a control method + * (with zero args, because the args > 0 case was handled above) + * Examples are: _DIS, _INI, _IRC, _OFF, _ON, _PSx + */ + if (!PredefinedNames[Index].Info.ExpectedBtypes) + { + AslError (ASL_ERROR, ASL_MSG_RESERVED_METHOD, Op, + "with zero arguments"); + return; + } + + /* Typecheck the actual object, it is the next argument */ + + ApCheckObjectType (Op->Asl.Child->Asl.Next, + PredefinedNames[Index].Info.ExpectedBtypes); + return; } - - /* Typecheck the actual object, it is the next argument */ - - ApCheckObjectType (Op->Asl.Child->Asl.Next, - PredefinedNames[Index].Info.ExpectedBtypes); } @@ -514,7 +544,7 @@ ApCheckForPredefinedName ( } } - /* Check for _Lxx, _Exx, _Qxx, _T_x. Warning if unknown predefined name */ + /* Check for _Lxx/_Exx/_Wxx/_Qxx/_T_x. Warning if unknown predefined name */ return (ApCheckForSpecialName (Op, Name)); } @@ -530,7 +560,7 @@ ApCheckForPredefinedName ( * RETURN: None * * DESCRIPTION: Check for the "special" predefined names - - * _Lxx, _Exx, _Qxx, and _T_x + * _Lxx, _Exx, _Qxx, _Wxx, and _T_x * ******************************************************************************/ @@ -541,14 +571,16 @@ ApCheckForSpecialName ( { /* - * Check for the "special" predefined names. We know the first char is an - * underscore already. + * Check for the "special" predefined names. We already know that the + * first character is an underscore. * GPE: _Lxx * GPE: _Exx + * GPE: _Wxx * EC: _Qxx */ if ((Name[1] == 'L') || (Name[1] == 'E') || + (Name[1] == 'W') || (Name[1] == 'Q')) { /* The next two characters must be hex digits */ diff --git a/sys/contrib/dev/acpica/compiler/aslresource.c b/sys/contrib/dev/acpica/compiler/aslresource.c index 3242ba700d6..25289f54a21 100644 --- a/sys/contrib/dev/acpica/compiler/aslresource.c +++ b/sys/contrib/dev/acpica/compiler/aslresource.c @@ -1,7 +1,7 @@ /****************************************************************************** * - * Module Name: aslresource - Resource templates and descriptors + * Module Name: aslresource - Resource template/descriptor utilities * *****************************************************************************/ @@ -124,6 +124,298 @@ ACPI_MODULE_NAME ("aslresource") +/******************************************************************************* + * + * FUNCTION: RsSmallAddressCheck + * + * PARAMETERS: Minimum - Address Min value + * Maximum - Address Max value + * Length - Address range value + * Alignment - Address alignment value + * MinOp - Original Op for Address Min + * MaxOp - Original Op for Address Max + * LengthOp - Original Op for address range + * AlignOp - Original Op for address alignment. If + * NULL, means "zero value for alignment is + * OK, and means 64K alignment" (for + * Memory24 descriptor) + * + * RETURN: None. Adds error messages to error log if necessary + * + * DESCRIPTION: Perform common value checks for "small" address descriptors. + * Currently: + * Io, Memory24, Memory32 + * + ******************************************************************************/ + +void +RsSmallAddressCheck ( + UINT8 Type, + UINT32 Minimum, + UINT32 Maximum, + UINT32 Length, + UINT32 Alignment, + ACPI_PARSE_OBJECT *MinOp, + ACPI_PARSE_OBJECT *MaxOp, + ACPI_PARSE_OBJECT *LengthOp, + ACPI_PARSE_OBJECT *AlignOp) +{ + + if (Gbl_NoResourceChecking) + { + return; + } + + /* Special case for Memory24, values are compressed */ + + if (Type == ACPI_RESOURCE_NAME_MEMORY24) + { + if (!Alignment) /* Alignment==0 means 64K - no invalid alignment */ + { + Alignment = ACPI_UINT16_MAX + 1; + } + + Minimum <<= 8; + Maximum <<= 8; + Length *= 256; + } + + /* IO descriptor has different definition of min/max, don't check */ + + if (Type != ACPI_RESOURCE_NAME_IO) + { + /* Basic checks on Min/Max/Length */ + + if (Minimum > Maximum) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_MIN_MAX, MinOp, NULL); + } + else if (Length > (Maximum - Minimum + 1)) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_LENGTH, LengthOp, NULL); + } + } + + /* Alignment of zero is not in ACPI spec, but is used to mean byte acc */ + + if (!Alignment) + { + Alignment = 1; + } + + /* Addresses must be an exact multiple of the alignment value */ + + if (Minimum % Alignment) + { + AslError (ASL_ERROR, ASL_MSG_ALIGNMENT, MinOp, NULL); + } + if (Maximum % Alignment) + { + AslError (ASL_ERROR, ASL_MSG_ALIGNMENT, MaxOp, NULL); + } +} + + +/******************************************************************************* + * + * FUNCTION: RsLargeAddressCheck + * + * PARAMETERS: Minimum - Address Min value + * Maximum - Address Max value + * Length - Address range value + * Granularity - Address granularity value + * Flags - General flags for address descriptors: + * _MIF, _MAF, _DEC + * MinOp - Original Op for Address Min + * MaxOp - Original Op for Address Max + * LengthOp - Original Op for address range + * GranOp - Original Op for address granularity + * + * RETURN: None. Adds error messages to error log if necessary + * + * DESCRIPTION: Perform common value checks for "large" address descriptors. + * Currently: + * WordIo, WordBusNumber, WordSpace + * DWordIo, DWordMemory, DWordSpace + * QWordIo, QWordMemory, QWordSpace + * ExtendedIo, ExtendedMemory, ExtendedSpace + * + * _MIF flag set means that the minimum address is fixed and is not relocatable + * _MAF flag set means that the maximum address is fixed and is not relocatable + * Length of zero means that the record size is variable + * + * This function implements the LEN/MIF/MAF/MIN/MAX/GRA rules within Table 6-40 + * of the ACPI 4.0a specification. Added 04/2010. + * + ******************************************************************************/ + +void +RsLargeAddressCheck ( + UINT64 Minimum, + UINT64 Maximum, + UINT64 Length, + UINT64 Granularity, + UINT8 Flags, + ACPI_PARSE_OBJECT *MinOp, + ACPI_PARSE_OBJECT *MaxOp, + ACPI_PARSE_OBJECT *LengthOp, + ACPI_PARSE_OBJECT *GranOp) +{ + + if (Gbl_NoResourceChecking) + { + return; + } + + /* Basic checks on Min/Max/Length */ + + if (Minimum > Maximum) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_MIN_MAX, MinOp, NULL); + return; + } + else if (Length > (Maximum - Minimum + 1)) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_LENGTH, LengthOp, NULL); + return; + } + + /* If specified (non-zero), ensure granularity is a power-of-two minus one */ + + if (Granularity) + { + if ((Granularity + 1) & + Granularity) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_GRANULARITY, GranOp, NULL); + return; + } + } + + /* + * Check the various combinations of Length, MinFixed, and MaxFixed + */ + if (Length) + { + /* Fixed non-zero length */ + + switch (Flags & (ACPI_RESOURCE_FLAG_MIF | ACPI_RESOURCE_FLAG_MAF)) + { + case 0: + /* + * Fixed length, variable locations (both _MIN and _MAX). + * Length must be a multiple of granularity + */ + if (Granularity & Length) + { + AslError (ASL_ERROR, ASL_MSG_ALIGNMENT, LengthOp, NULL); + } + break; + + case (ACPI_RESOURCE_FLAG_MIF | ACPI_RESOURCE_FLAG_MAF): + + /* Fixed length, fixed location. Granularity must be zero */ + + if (Granularity != 0) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_GRAN_FIXED, GranOp, NULL); + } + + /* Length must be exactly the size of the min/max window */ + + if (Length != (Maximum - Minimum + 1)) + { + AslError (ASL_ERROR, ASL_MSG_INVALID_LENGTH_FIXED, LengthOp, NULL); + } + break; + + /* All other combinations are invalid */ + + case ACPI_RESOURCE_FLAG_MIF: + case ACPI_RESOURCE_FLAG_MAF: + default: + AslError (ASL_ERROR, ASL_MSG_INVALID_ADDR_FLAGS, LengthOp, NULL); + } + } + else + { + /* Variable length (length==0) */ + + switch (Flags & (ACPI_RESOURCE_FLAG_MIF | ACPI_RESOURCE_FLAG_MAF)) + { + case 0: + /* + * Both _MIN and _MAX are variable. + * No additional requirements, just exit + */ + break; + + case ACPI_RESOURCE_FLAG_MIF: + + /* _MIN is fixed. _MIN must be multiple of _GRA */ + + /* + * The granularity is defined by the ACPI specification to be a + * power-of-two minus one, therefore the granularity is a + * bitmask which can be used to easily validate the addresses. + */ + if (Granularity & Minimum) + { + AslError (ASL_ERROR, ASL_MSG_ALIGNMENT, MinOp, NULL); + } + break; + + case ACPI_RESOURCE_FLAG_MAF: + + /* _MAX is fixed. (_MAX + 1) must be multiple of _GRA */ + + if (Granularity & (Maximum + 1)) + { + AslError (ASL_ERROR, ASL_MSG_ALIGNMENT, MaxOp, "-1"); + } + break; + + /* Both MIF/MAF set is invalid if length is zero */ + + case (ACPI_RESOURCE_FLAG_MIF | ACPI_RESOURCE_FLAG_MAF): + default: + AslError (ASL_ERROR, ASL_MSG_INVALID_ADDR_FLAGS, LengthOp, NULL); + } + } +} + + +/******************************************************************************* + * + * FUNCTION: RsGetStringDataLength + * + * PARAMETERS: InitializerOp - Start of a subtree of init nodes + * + * RETURN: Valid string length if a string node is found (otherwise 0) + * + * DESCRIPTION: In a list of peer nodes, find the first one that contains a + * string and return the length of the string. + * + ******************************************************************************/ + +UINT16 +RsGetStringDataLength ( + ACPI_PARSE_OBJECT *InitializerOp) +{ + + while (InitializerOp) + { + if (InitializerOp->Asl.ParseOpcode == PARSEOP_STRING_LITERAL) + { + return ((UINT16) (strlen (InitializerOp->Asl.Value.String) + 1)); + } + InitializerOp = ASL_GET_PEER_NODE (InitializerOp); + } + + return 0; +} + + /******************************************************************************* * * FUNCTION: RsAllocateResourceNode diff --git a/sys/contrib/dev/acpica/compiler/aslrestype1.c b/sys/contrib/dev/acpica/compiler/aslrestype1.c index 036abdf116f..32f2cfc1458 100644 --- a/sys/contrib/dev/acpica/compiler/aslrestype1.c +++ b/sys/contrib/dev/acpica/compiler/aslrestype1.c @@ -1,7 +1,7 @@ /****************************************************************************** * - * Module Name: aslrestype1 - Short (type1) resource templates and descriptors + * Module Name: aslrestype1 - Miscellaneous small resource descriptors * *****************************************************************************/ @@ -121,6 +121,18 @@ #define _COMPONENT ACPI_COMPILER ACPI_MODULE_NAME ("aslrestype1") +/* + * This module contains miscellaneous small resource descriptors: + * + * EndTag + * EndDependentFn + * Memory24 + * Memory32 + * Memory32Fixed + * StartDependentFn + * StartDependentFnNoPri + * VendorShort + */ /******************************************************************************* * @@ -156,127 +168,6 @@ RsDoEndTagDescriptor ( } -/******************************************************************************* - * - * FUNCTION: RsDoDmaDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a short "DMA" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoDmaDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT32 i; - UINT8 DmaChannelMask = 0; - UINT8 DmaChannels = 0; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_DMA)); - - Descriptor = Rnode->Buffer; - Descriptor->Dma.DescriptorType = ACPI_RESOURCE_NAME_DMA | - ASL_RDESC_DMA_SIZE; - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* DMA type */ - - RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DMATYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 5); - break; - - case 1: /* Bus Master */ - - RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_BUSMASTER, - CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 2); - break; - - case 2: /* Xfer Type (transfer width) */ - - RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 0, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_XFERTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 0); - break; - - case 3: /* Name */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - /* All DMA channel bytes are handled here, after flags and name */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - /* Up to 8 channels can be specified in the list */ - - DmaChannels++; - if (DmaChannels > 8) - { - AslError (ASL_ERROR, ASL_MSG_DMA_LIST, - InitializerOp, NULL); - return (Rnode); - } - - /* Only DMA channels 0-7 are allowed (mask is 8 bits) */ - - if (InitializerOp->Asl.Value.Integer > 7) - { - AslError (ASL_ERROR, ASL_MSG_DMA_CHANNEL, - InitializerOp, NULL); - } - - /* Build the mask */ - - DmaChannelMask |= - (1 << ((UINT8) InitializerOp->Asl.Value.Integer)); - } - - if (i == 4) /* case 4: First DMA byte */ - { - /* Check now for duplicates in list */ - - RsCheckListForDuplicates (InitializerOp); - - /* Create a named field at the start of the list */ - - RsCreateByteField (InitializerOp, ACPI_RESTAG_DMA, - CurrentByteOffset + - ASL_RESDESC_OFFSET (Dma.DmaChannelMask)); - } - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - /* Now we can set the channel mask */ - - Descriptor->Dma.DmaChannelMask = DmaChannelMask; - return (Rnode); -} - - /******************************************************************************* * * FUNCTION: RsDoEndDependentDescriptor @@ -309,396 +200,6 @@ RsDoEndDependentDescriptor ( } -/******************************************************************************* - * - * FUNCTION: RsDoFixedIoDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a short "FixedIO" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoFixedIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_FIXED_IO)); - - Descriptor = Rnode->Buffer; - Descriptor->Io.DescriptorType = ACPI_RESOURCE_NAME_FIXED_IO | - ASL_RDESC_FIXED_IO_SIZE; - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Base Address */ - - Descriptor->FixedIo.Address = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_BASEADDRESS, - CurrentByteOffset + ASL_RESDESC_OFFSET (FixedIo.Address)); - break; - - case 1: /* Length */ - - Descriptor->FixedIo.AddressLength = - (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (FixedIo.AddressLength)); - break; - - case 2: /* Name */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoIoDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a short "IO" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IO)); - - Descriptor = Rnode->Buffer; - Descriptor->Io.DescriptorType = ACPI_RESOURCE_NAME_IO | - ASL_RDESC_IO_SIZE; - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Decode size */ - - RsSetFlagBits (&Descriptor->Io.Flags, InitializerOp, 0, 1); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Flags), 0); - break; - - case 1: /* Min Address */ - - Descriptor->Io.Minimum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Minimum)); - break; - - case 2: /* Max Address */ - - Descriptor->Io.Maximum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Maximum)); - break; - - case 3: /* Alignment */ - - Descriptor->Io.Alignment = - (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_ALIGNMENT, - CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Alignment)); - break; - - case 4: /* Length */ - - Descriptor->Io.AddressLength = - (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Io.AddressLength)); - break; - - case 5: /* Name */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoIrqDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a short "IRQ" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoIrqDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT32 Interrupts = 0; - UINT16 IrqMask = 0; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IRQ)); - - /* Length = 3 (with flag byte) */ - - Descriptor = Rnode->Buffer; - Descriptor->Irq.DescriptorType = ACPI_RESOURCE_NAME_IRQ | - (ASL_RDESC_IRQ_SIZE + 0x01); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Interrupt Type (or Mode - edge/level) */ - - RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 0, 1); - RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 0); - break; - - case 1: /* Interrupt Level (or Polarity - Active high/low) */ - - RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTLEVEL, - CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 3); - break; - - case 2: /* Share Type - Default: exclusive (0) */ - - RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 4, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTSHARE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 4); - break; - - case 3: /* Name */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - /* All IRQ bytes are handled here, after the flags and name */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - /* Up to 16 interrupts can be specified in the list */ - - Interrupts++; - if (Interrupts > 16) - { - AslError (ASL_ERROR, ASL_MSG_INTERRUPT_LIST, - InitializerOp, NULL); - return (Rnode); - } - - /* Only interrupts 0-15 are allowed (mask is 16 bits) */ - - if (InitializerOp->Asl.Value.Integer > 15) - { - AslError (ASL_ERROR, ASL_MSG_INTERRUPT_NUMBER, - InitializerOp, NULL); - } - else - { - IrqMask |= (1 << (UINT8) InitializerOp->Asl.Value.Integer); - } - } - - /* Case 4: First IRQ value in list */ - - if (i == 4) - { - /* Check now for duplicates in list */ - - RsCheckListForDuplicates (InitializerOp); - - /* Create a named field at the start of the list */ - - RsCreateByteField (InitializerOp, ACPI_RESTAG_INTERRUPT, - CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.IrqMask)); - } - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - /* Now we can set the channel mask */ - - Descriptor->Irq.IrqMask = IrqMask; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoIrqNoFlagsDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a short "IRQNoFlags" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoIrqNoFlagsDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT16 IrqMask = 0; - UINT32 Interrupts = 0; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IRQ_NOFLAGS)); - - Descriptor = Rnode->Buffer; - Descriptor->Irq.DescriptorType = ACPI_RESOURCE_NAME_IRQ | - ASL_RDESC_IRQ_SIZE; - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Name */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - /* IRQ bytes are handled here, after the flags and name */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - /* Up to 16 interrupts can be specified in the list */ - - Interrupts++; - if (Interrupts > 16) - { - AslError (ASL_ERROR, ASL_MSG_INTERRUPT_LIST, - InitializerOp, NULL); - return (Rnode); - } - - /* Only interrupts 0-15 are allowed (mask is 16 bits) */ - - if (InitializerOp->Asl.Value.Integer > 15) - { - AslError (ASL_ERROR, ASL_MSG_INTERRUPT_NUMBER, - InitializerOp, NULL); - } - else - { - IrqMask |= (1 << ((UINT8) InitializerOp->Asl.Value.Integer)); - } - } - - /* Case 1: First IRQ value in list */ - - if (i == 1) - { - /* Check now for duplicates in list */ - - RsCheckListForDuplicates (InitializerOp); - - /* Create a named field at the start of the list */ - - RsCreateByteField (InitializerOp, ACPI_RESTAG_INTERRUPT, - CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.IrqMask)); - } - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - /* Now we can set the interrupt mask */ - - Descriptor->Irq.IrqMask = IrqMask; - return (Rnode); -} - - /******************************************************************************* * * FUNCTION: RsDoMemory24Descriptor @@ -720,6 +221,10 @@ RsDoMemory24Descriptor ( { AML_RESOURCE *Descriptor; ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *AlignOp = NULL; ASL_RESOURCE_NODE *Rnode; UINT32 i; @@ -749,6 +254,7 @@ RsDoMemory24Descriptor ( Descriptor->Memory24.Minimum = (UINT16) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory24.Minimum)); + MinOp = InitializerOp; break; case 2: /* Max Address */ @@ -756,6 +262,7 @@ RsDoMemory24Descriptor ( Descriptor->Memory24.Maximum = (UINT16) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory24.Maximum)); + MaxOp = InitializerOp; break; case 3: /* Alignment */ @@ -763,6 +270,7 @@ RsDoMemory24Descriptor ( Descriptor->Memory24.Alignment = (UINT16) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_ALIGNMENT, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory24.Alignment)); + AlignOp = InitializerOp; break; case 4: /* Length */ @@ -770,6 +278,7 @@ RsDoMemory24Descriptor ( Descriptor->Memory24.AddressLength = (UINT16) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory24.AddressLength)); + LengthOp = InitializerOp; break; case 5: /* Name */ @@ -786,6 +295,15 @@ RsDoMemory24Descriptor ( InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); } + /* Validate the Min/Max/Len/Align values (Alignment==0 means 64K) */ + + RsSmallAddressCheck (ACPI_RESOURCE_NAME_MEMORY24, + Descriptor->Memory24.Minimum, + Descriptor->Memory24.Maximum, + Descriptor->Memory24.AddressLength, + Descriptor->Memory24.Alignment, + MinOp, MaxOp, LengthOp, NULL); + return (Rnode); } @@ -811,6 +329,10 @@ RsDoMemory32Descriptor ( { AML_RESOURCE *Descriptor; ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *AlignOp = NULL; ASL_RESOURCE_NODE *Rnode; UINT32 i; @@ -840,6 +362,7 @@ RsDoMemory32Descriptor ( Descriptor->Memory32.Minimum = (UINT32) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory32.Minimum)); + MinOp = InitializerOp; break; case 2: /* Max Address */ @@ -847,6 +370,7 @@ RsDoMemory32Descriptor ( Descriptor->Memory32.Maximum = (UINT32) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory32.Maximum)); + MaxOp = InitializerOp; break; case 3: /* Alignment */ @@ -854,6 +378,7 @@ RsDoMemory32Descriptor ( Descriptor->Memory32.Alignment = (UINT32) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_ALIGNMENT, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory32.Alignment)); + AlignOp = InitializerOp; break; case 4: /* Length */ @@ -861,6 +386,7 @@ RsDoMemory32Descriptor ( Descriptor->Memory32.AddressLength = (UINT32) InitializerOp->Asl.Value.Integer; RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, CurrentByteOffset + ASL_RESDESC_OFFSET (Memory32.AddressLength)); + LengthOp = InitializerOp; break; case 5: /* Name */ @@ -877,6 +403,15 @@ RsDoMemory32Descriptor ( InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); } + /* Validate the Min/Max/Len/Align values */ + + RsSmallAddressCheck (ACPI_RESOURCE_NAME_MEMORY32, + Descriptor->Memory32.Minimum, + Descriptor->Memory32.Maximum, + Descriptor->Memory32.AddressLength, + Descriptor->Memory32.Alignment, + MinOp, MaxOp, LengthOp, AlignOp); + return (Rnode); } @@ -1027,6 +562,7 @@ RsDoStartDependentDescriptor ( break; default: + NextRnode = RsDoOneResourceDescriptor (InitializerOp, CurrentByteOffset, &State); @@ -1036,7 +572,6 @@ RsDoStartDependentDescriptor ( * must keep track of the offset of not only each descriptor, but each * element (field) within each descriptor as well. */ - CurrentByteOffset += RsLinkDescriptorChain (&PreviousRnode, NextRnode); break; @@ -1182,4 +717,3 @@ RsDoVendorSmallDescriptor ( return (Rnode); } - diff --git a/sys/contrib/dev/acpica/compiler/aslrestype1i.c b/sys/contrib/dev/acpica/compiler/aslrestype1i.c new file mode 100644 index 00000000000..a2b80cbc5f3 --- /dev/null +++ b/sys/contrib/dev/acpica/compiler/aslrestype1i.c @@ -0,0 +1,668 @@ + +/****************************************************************************** + * + * Module Name: aslrestype1i - Small I/O-related resource descriptors + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include "aslcompiler.y.h" + +#define _COMPONENT ACPI_COMPILER + ACPI_MODULE_NAME ("aslrestype1i") + +/* + * This module contains the I/O-related small resource descriptors: + * + * DMA + * FixedIO + * IO + * IRQ + * IRQNoFlags + */ + +/******************************************************************************* + * + * FUNCTION: RsDoDmaDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a short "DMA" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoDmaDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ASL_RESOURCE_NODE *Rnode; + UINT32 i; + UINT8 DmaChannelMask = 0; + UINT8 DmaChannels = 0; + + + InitializerOp = Op->Asl.Child; + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_DMA)); + + Descriptor = Rnode->Buffer; + Descriptor->Dma.DescriptorType = ACPI_RESOURCE_NAME_DMA | + ASL_RDESC_DMA_SIZE; + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* DMA type */ + + RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DMATYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 5); + break; + + case 1: /* Bus Master */ + + RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_BUSMASTER, + CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 2); + break; + + case 2: /* Xfer Type (transfer width) */ + + RsSetFlagBits (&Descriptor->Dma.Flags, InitializerOp, 0, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_XFERTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Dma.Flags), 0); + break; + + case 3: /* Name */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + /* All DMA channel bytes are handled here, after flags and name */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + /* Up to 8 channels can be specified in the list */ + + DmaChannels++; + if (DmaChannels > 8) + { + AslError (ASL_ERROR, ASL_MSG_DMA_LIST, + InitializerOp, NULL); + return (Rnode); + } + + /* Only DMA channels 0-7 are allowed (mask is 8 bits) */ + + if (InitializerOp->Asl.Value.Integer > 7) + { + AslError (ASL_ERROR, ASL_MSG_DMA_CHANNEL, + InitializerOp, NULL); + } + + /* Build the mask */ + + DmaChannelMask |= + (1 << ((UINT8) InitializerOp->Asl.Value.Integer)); + } + + if (i == 4) /* case 4: First DMA byte */ + { + /* Check now for duplicates in list */ + + RsCheckListForDuplicates (InitializerOp); + + /* Create a named field at the start of the list */ + + RsCreateByteField (InitializerOp, ACPI_RESTAG_DMA, + CurrentByteOffset + + ASL_RESDESC_OFFSET (Dma.DmaChannelMask)); + } + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Now we can set the channel mask */ + + Descriptor->Dma.DmaChannelMask = DmaChannelMask; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoFixedIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a short "FixedIO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoFixedIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *AddressOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_FIXED_IO)); + + Descriptor = Rnode->Buffer; + Descriptor->Io.DescriptorType = ACPI_RESOURCE_NAME_FIXED_IO | + ASL_RDESC_FIXED_IO_SIZE; + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Base Address */ + + Descriptor->FixedIo.Address = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_BASEADDRESS, + CurrentByteOffset + ASL_RESDESC_OFFSET (FixedIo.Address)); + AddressOp = InitializerOp; + break; + + case 1: /* Length */ + + Descriptor->FixedIo.AddressLength = + (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (FixedIo.AddressLength)); + break; + + case 2: /* Name */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Error checks */ + + if (Descriptor->FixedIo.Address > 0x03FF) + { + AslError (ASL_WARNING, ASL_MSG_ISA_ADDRESS, AddressOp, NULL); + } + + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a short "IO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *AlignOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IO)); + + Descriptor = Rnode->Buffer; + Descriptor->Io.DescriptorType = ACPI_RESOURCE_NAME_IO | + ASL_RDESC_IO_SIZE; + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Decode size */ + + RsSetFlagBits (&Descriptor->Io.Flags, InitializerOp, 0, 1); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Flags), 0); + break; + + case 1: /* Min Address */ + + Descriptor->Io.Minimum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Minimum)); + MinOp = InitializerOp; + break; + + case 2: /* Max Address */ + + Descriptor->Io.Maximum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Maximum)); + MaxOp = InitializerOp; + break; + + case 3: /* Alignment */ + + Descriptor->Io.Alignment = + (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_ALIGNMENT, + CurrentByteOffset + ASL_RESDESC_OFFSET (Io.Alignment)); + AlignOp = InitializerOp; + break; + + case 4: /* Length */ + + Descriptor->Io.AddressLength = + (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Io.AddressLength)); + LengthOp = InitializerOp; + break; + + case 5: /* Name */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Align values */ + + RsSmallAddressCheck (ACPI_RESOURCE_NAME_IO, + Descriptor->Io.Minimum, + Descriptor->Io.Maximum, + Descriptor->Io.AddressLength, + Descriptor->Io.Alignment, + MinOp, MaxOp, LengthOp, AlignOp); + + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoIrqDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a short "IRQ" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoIrqDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ASL_RESOURCE_NODE *Rnode; + UINT32 Interrupts = 0; + UINT16 IrqMask = 0; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IRQ)); + + /* Length = 3 (with flag byte) */ + + Descriptor = Rnode->Buffer; + Descriptor->Irq.DescriptorType = ACPI_RESOURCE_NAME_IRQ | + (ASL_RDESC_IRQ_SIZE + 0x01); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Interrupt Type (or Mode - edge/level) */ + + RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 0, 1); + RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 0); + break; + + case 1: /* Interrupt Level (or Polarity - Active high/low) */ + + RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTLEVEL, + CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 3); + break; + + case 2: /* Share Type - Default: exclusive (0) */ + + RsSetFlagBits (&Descriptor->Irq.Flags, InitializerOp, 4, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_INTERRUPTSHARE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.Flags), 4); + break; + + case 3: /* Name */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + /* All IRQ bytes are handled here, after the flags and name */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + /* Up to 16 interrupts can be specified in the list */ + + Interrupts++; + if (Interrupts > 16) + { + AslError (ASL_ERROR, ASL_MSG_INTERRUPT_LIST, + InitializerOp, NULL); + return (Rnode); + } + + /* Only interrupts 0-15 are allowed (mask is 16 bits) */ + + if (InitializerOp->Asl.Value.Integer > 15) + { + AslError (ASL_ERROR, ASL_MSG_INTERRUPT_NUMBER, + InitializerOp, NULL); + } + else + { + IrqMask |= (1 << (UINT8) InitializerOp->Asl.Value.Integer); + } + } + + /* Case 4: First IRQ value in list */ + + if (i == 4) + { + /* Check now for duplicates in list */ + + RsCheckListForDuplicates (InitializerOp); + + /* Create a named field at the start of the list */ + + RsCreateByteField (InitializerOp, ACPI_RESTAG_INTERRUPT, + CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.IrqMask)); + } + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Now we can set the channel mask */ + + Descriptor->Irq.IrqMask = IrqMask; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoIrqNoFlagsDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a short "IRQNoFlags" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoIrqNoFlagsDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ASL_RESOURCE_NODE *Rnode; + UINT16 IrqMask = 0; + UINT32 Interrupts = 0; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_IRQ_NOFLAGS)); + + Descriptor = Rnode->Buffer; + Descriptor->Irq.DescriptorType = ACPI_RESOURCE_NAME_IRQ | + ASL_RDESC_IRQ_SIZE; + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Name */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + /* IRQ bytes are handled here, after the flags and name */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + /* Up to 16 interrupts can be specified in the list */ + + Interrupts++; + if (Interrupts > 16) + { + AslError (ASL_ERROR, ASL_MSG_INTERRUPT_LIST, + InitializerOp, NULL); + return (Rnode); + } + + /* Only interrupts 0-15 are allowed (mask is 16 bits) */ + + if (InitializerOp->Asl.Value.Integer > 15) + { + AslError (ASL_ERROR, ASL_MSG_INTERRUPT_NUMBER, + InitializerOp, NULL); + } + else + { + IrqMask |= (1 << ((UINT8) InitializerOp->Asl.Value.Integer)); + } + } + + /* Case 1: First IRQ value in list */ + + if (i == 1) + { + /* Check now for duplicates in list */ + + RsCheckListForDuplicates (InitializerOp); + + /* Create a named field at the start of the list */ + + RsCreateByteField (InitializerOp, ACPI_RESTAG_INTERRUPT, + CurrentByteOffset + ASL_RESDESC_OFFSET (Irq.IrqMask)); + } + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Now we can set the interrupt mask */ + + Descriptor->Irq.IrqMask = IrqMask; + return (Rnode); +} diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2.c b/sys/contrib/dev/acpica/compiler/aslrestype2.c index 48be8536600..e1537018f74 100644 --- a/sys/contrib/dev/acpica/compiler/aslrestype2.c +++ b/sys/contrib/dev/acpica/compiler/aslrestype2.c @@ -1,7 +1,7 @@ /****************************************************************************** * - * Module Name: aslrestype2 - Long (type2) resource templates and descriptors + * Module Name: aslrestype2 - Miscellaneous Large resource descriptors * *****************************************************************************/ @@ -117,51 +117,22 @@ #include #include "aslcompiler.y.h" +#include #define _COMPONENT ACPI_COMPILER ACPI_MODULE_NAME ("aslrestype2") -/* Local prototypes */ - -static UINT16 -RsGetStringDataLength ( - ACPI_PARSE_OBJECT *InitializerOp); - +/* + * This module contains miscellaneous large resource descriptors: + * + * Register + * Interrupt + * VendorLong + */ /******************************************************************************* * - * FUNCTION: RsGetStringDataLength - * - * PARAMETERS: InitializerOp - Start of a subtree of init nodes - * - * RETURN: Valid string length if a string node is found (otherwise 0) - * - * DESCRIPTION: In a list of peer nodes, find the first one that contains a - * string and return the length of the string. - * - ******************************************************************************/ - -static UINT16 -RsGetStringDataLength ( - ACPI_PARSE_OBJECT *InitializerOp) -{ - - while (InitializerOp) - { - if (InitializerOp->Asl.ParseOpcode == PARSEOP_STRING_LITERAL) - { - return ((UINT16) (strlen (InitializerOp->Asl.Value.String) + 1)); - } - InitializerOp = ASL_GET_PEER_NODE (InitializerOp); - } - - return 0; -} - - -/******************************************************************************* - * - * FUNCTION: RsDoDwordIoDescriptor + * FUNCTION: RsDoGeneralRegisterDescriptor * * PARAMETERS: Op - Parent resource descriptor parse node * CurrentByteOffset - Offset into the resource template AML @@ -169,43 +140,27 @@ RsGetStringDataLength ( * * RETURN: Completed resource node * - * DESCRIPTION: Construct a long "DwordIO" descriptor + * DESCRIPTION: Construct a long "Register" descriptor * ******************************************************************************/ ASL_RESOURCE_NODE * -RsDoDwordIoDescriptor ( +RsDoGeneralRegisterDescriptor ( ACPI_PARSE_OBJECT *Op, UINT32 CurrentByteOffset) { AML_RESOURCE *Descriptor; ACPI_PARSE_OBJECT *InitializerOp; ASL_RESOURCE_NODE *Rnode; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT8 *OptionalFields; UINT32 i; - BOOLEAN ResSourceIndex = FALSE; InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); + Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_GENERIC_REGISTER)); Descriptor = Rnode->Buffer; - Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; - Descriptor->Address32.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); - Descriptor->Address32.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS32) - - sizeof (AML_RESOURCE_LARGE_HEADER)); + Descriptor->GenericReg.DescriptorType = ACPI_RESOURCE_NAME_GENERIC_REGISTER; + Descriptor->GenericReg.ResourceLength = 12; /* Process all child initialization nodes */ @@ -213,1637 +168,48 @@ RsDoDwordIoDescriptor ( { switch (i) { - case 0: /* Resource Usage */ + case 0: /* Address space */ - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); - break; - - case 1: /* MinType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); - break; - - case 2: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); - break; - - case 3: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); - break; - - case 4: /* Range Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 0, 3); - RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 0); - break; - - case 5: /* Address Granularity */ - - Descriptor->Address32.Granularity = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); - break; - - case 6: /* Address Min */ - - Descriptor->Address32.Minimum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); - break; - - case 7: /* Address Max */ - - Descriptor->Address32.Maximum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); - break; - - case 8: /* Translation Offset */ - - Descriptor->Address32.TranslationOffset = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); - break; - - case 9: /* Address Length */ - - Descriptor->Address32.AddressLength = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); - break; - - case 10: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - /* Found a valid ResourceSourceIndex */ - - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address32.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 11: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - /* Found a valid ResourceSource */ - - Descriptor->Address32.ResourceLength = (UINT16) - (Descriptor->Address32.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 12: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - case 13: /* Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 4, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 4); - break; - - case 14: /* Translation Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoDwordMemoryDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "DwordMemory" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoDwordMemoryDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; - Descriptor->Address32.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); - Descriptor->Address32.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS32) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); - break; - - case 1: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); - break; - - case 2: /* MinType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); - break; - - case 3: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); - break; - - case 4: /* Memory Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 1); - break; - - case 5: /* Read/Write Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 0, 1); - RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 0); - break; - - case 6: /* Address Granularity */ - - Descriptor->Address32.Granularity = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->Address32.Minimum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->Address32.Maximum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->Address32.TranslationOffset = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->Address32.AddressLength = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); - break; - - case 11: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address32.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 12: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address32.ResourceLength = (UINT16) - (Descriptor->Address32.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 13: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - - case 14: /* Address Range */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 3); - break; - - case 15: /* Type */ - - RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoDwordSpaceDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "DwordSpace" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoDwordSpaceDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); - Descriptor->Address32.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS32) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Type */ - - Descriptor->Address32.ResourceType = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 1: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); - break; - - case 2: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); - break; - - case 3: /* MinType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); - break; - - case 4: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); - break; - - case 5: /* Type-Specific flags */ - - Descriptor->Address32.SpecificFlags = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 6: /* Address Granularity */ - - Descriptor->Address32.Granularity = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->Address32.Minimum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->Address32.Maximum = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->Address32.TranslationOffset = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->Address32.AddressLength = - (UINT32) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); - break; - - case 11: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address32.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 12: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address32.ResourceLength = (UINT16) - (Descriptor->Address32.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 13: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, - InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoExtendedIoDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "ExtendedIO" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoExtendedIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT16 StringLength = 0; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; - Descriptor->ExtAddress64.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; - Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; - - Descriptor->ExtAddress64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); - break; - - case 1: /* MinType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); - break; - - case 2: /* MaxType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); - break; - - case 3: /* DecodeType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); - break; - - case 4: /* Range Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 0, 3); - RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 0); - break; - - case 5: /* Address Granularity */ - - Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); + Descriptor->GenericReg.AddressSpaceId = (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_ADDRESSSPACE, + CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.AddressSpaceId)); break; - case 6: /* Address Min */ + case 1: /* Register Bit Width */ - Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); + Descriptor->GenericReg.BitWidth = (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_REGISTERBITWIDTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.BitWidth)); break; - case 7: /* Address Max */ + case 2: /* Register Bit Offset */ - Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); + Descriptor->GenericReg.BitOffset = (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_REGISTERBITOFFSET, + CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.BitOffset)); break; - case 8: /* Translation Offset */ + case 3: /* Register Address */ - Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); + Descriptor->GenericReg.Address = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_ADDRESS, + CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.Address)); break; - case 9: /* Address Length */ + case 4: /* Access Size (ACPI 3.0) */ - Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); - break; + Descriptor->GenericReg.AccessSize = (UINT8) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_ACCESSSIZE, + CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.AccessSize)); - case 10: /* Type-Specific Attributes */ - - Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); - break; - - case 11: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - case 12: /* Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 4, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 4); - break; - - case 13: /* Translation Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoExtendedMemoryDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "ExtendedMemory" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoExtendedMemoryDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT16 StringLength = 0; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; - Descriptor->ExtAddress64.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; - Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; - - Descriptor->ExtAddress64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); - break; - - case 1: /* DecodeType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); - break; - - case 2: /* MinType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); - break; - - case 3: /* MaxType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); - break; - - case 4: /* Memory Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 1); - break; - - case 5: /* Read/Write Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 0, 1); - RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 0); - break; - - case 6: /* Address Granularity */ - - Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); - break; - - case 11: /* Type-Specific Attributes */ - - Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); - break; - - case 12: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - - case 13: /* Address Range */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 3); - break; - - case 14: /* Type */ - - RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoExtendedSpaceDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "ExtendedSpace" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoExtendedSpaceDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT16 StringLength = 0; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; - Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; - - Descriptor->ExtAddress64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Type */ - - Descriptor->ExtAddress64.ResourceType = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 1: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); - break; - - case 2: /* DecodeType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); - break; - - case 3: /* MinType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); - break; - - case 4: /* MaxType */ - - RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); - break; - - case 5: /* Type-Specific flags */ - - Descriptor->ExtAddress64.SpecificFlags = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 6: /* Address Granularity */ - - Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); - break; - - case 11: /* Type-Specific Attributes */ - - Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); - break; - - case 12: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoQwordIoDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "QwordIO" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoQwordIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; - Descriptor->Address64.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); - Descriptor->Address64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); - break; - - case 1: /* MinType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); - break; - - case 2: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); - break; - - case 3: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); - break; - - case 4: /* Range Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 0, 3); - RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 0); - break; - - case 5: /* Address Granularity */ - - Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); - break; - - case 6: /* Address Min */ - - Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); - break; - - case 7: /* Address Max */ - - Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); - break; - - case 8: /* Translation Offset */ - - Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); - break; - - case 9: /* Address Length */ - - Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); - break; - - case 10: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + if (Descriptor->GenericReg.AccessSize > AML_FIELD_ACCESS_QWORD) { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address64.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 11: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address64.ResourceLength = (UINT16) - (Descriptor->Address64.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + AslError (ASL_ERROR, ASL_MSG_INVALID_ACCESS_SIZE, InitializerOp, NULL); } -#endif break; - case 12: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - case 13: /* Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 4, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 4); - break; - - case 14: /* Translation Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoQwordMemoryDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "QwordMemory" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoQwordMemoryDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; - Descriptor->Address64.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); - Descriptor->Address64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); - break; - - case 1: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); - break; - - case 2: /* MinType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); - break; - - case 3: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); - break; - - case 4: /* Memory Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 1); - break; - - case 5: /* Read/Write Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 0, 1); - RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 0); - break; - - case 6: /* Address Granularity */ - - Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); - break; - - case 11: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address64.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 12: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address64.ResourceLength = (UINT16) - (Descriptor->Address64.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 13: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - - case 14: /* Address Range */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 3); - break; - - case 15: /* Type */ - - RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoQwordSpaceDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "QwordSpace" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoQwordSpaceDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); - Descriptor->Address64.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS64) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Type */ - - Descriptor->Address64.ResourceType = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 1: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); - break; - - case 2: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); - break; - - case 3: /* MinType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); - break; - - case 4: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); - break; - - case 5: /* Type-Specific flags */ - - Descriptor->Address64.SpecificFlags = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 6: /* Address Granularity */ - - Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); - break; - - case 11: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address64.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 12: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address64.ResourceLength = (UINT16) - (Descriptor->Address64.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 13: /* ResourceTag */ + case 5: /* ResourceTag (ACPI 3.0b) */ UtAttachNamepathToOwner (Op, InitializerOp); break; @@ -1856,600 +222,6 @@ RsDoQwordSpaceDescriptor ( InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoWordIoDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "WordIO" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoWordIoDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; - Descriptor->Address16.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); - Descriptor->Address16.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS16) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); - break; - - case 1: /* MinType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); - break; - - case 2: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); - break; - - case 3: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); - break; - - case 4: /* Range Type */ - - RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 0, 3); - RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 0); - break; - - case 5: /* Address Granularity */ - - Descriptor->Address16.Granularity = (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); - break; - - case 6: /* Address Min */ - - Descriptor->Address16.Minimum = (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); - break; - - case 7: /* Address Max */ - - Descriptor->Address16.Maximum = (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); - break; - - case 8: /* Translation Offset */ - - Descriptor->Address16.TranslationOffset = (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); - break; - - case 9: /* Address Length */ - - Descriptor->Address16.AddressLength = (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); - break; - - case 10: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address16.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 11: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address16.ResourceLength = (UINT16) - (Descriptor->Address16.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 12: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - case 13: /* Type */ - - RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 4, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 4); - break; - - case 14: /* Translation Type */ - - RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 5, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 5); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoWordBusNumberDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "WordBusNumber" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoWordBusNumberDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; - Descriptor->Address16.ResourceType = ACPI_ADDRESS_TYPE_BUS_NUMBER_RANGE; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); - Descriptor->Address16.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS16) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); - break; - - case 1: /* MinType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); - break; - - case 2: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); - break; - - case 3: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); - break; - - case 4: /* Address Granularity */ - - Descriptor->Address16.Granularity = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); - break; - - case 5: /* Min Address */ - - Descriptor->Address16.Minimum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); - break; - - case 6: /* Max Address */ - - Descriptor->Address16.Maximum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); - break; - - case 7: /* Translation Offset */ - - Descriptor->Address16.TranslationOffset = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); - break; - - case 8: /* Address Length */ - - Descriptor->Address16.AddressLength = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); - break; - - case 9: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address16.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 10: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address16.ResourceLength = (UINT16) - (Descriptor->Address16.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 11: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + - OptionIndex + StringLength; - return (Rnode); -} - - -/******************************************************************************* - * - * FUNCTION: RsDoWordSpaceDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "WordSpace" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoWordSpaceDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT8 *OptionalFields; - UINT16 StringLength = 0; - UINT32 OptionIndex = 0; - UINT32 i; - BOOLEAN ResSourceIndex = FALSE; - - - InitializerOp = Op->Asl.Child; - StringLength = RsGetStringDataLength (InitializerOp); - - Rnode = RsAllocateResourceNode ( - sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); - - Descriptor = Rnode->Buffer; - Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; - - /* - * Initial descriptor length -- may be enlarged if there are - * optional fields present - */ - OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); - Descriptor->Address16.ResourceLength = (UINT16) - (sizeof (AML_RESOURCE_ADDRESS16) - - sizeof (AML_RESOURCE_LARGE_HEADER)); - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Resource Type */ - - Descriptor->Address16.ResourceType = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 1: /* Resource Usage */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); - break; - - case 2: /* DecodeType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); - break; - - case 3: /* MinType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); - break; - - case 4: /* MaxType */ - - RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); - RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); - break; - - case 5: /* Type-Specific flags */ - - Descriptor->Address16.SpecificFlags = - (UINT8) InitializerOp->Asl.Value.Integer; - break; - - case 6: /* Address Granularity */ - - Descriptor->Address16.Granularity = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); - break; - - case 7: /* Min Address */ - - Descriptor->Address16.Minimum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); - break; - - case 8: /* Max Address */ - - Descriptor->Address16.Maximum = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); - break; - - case 9: /* Translation Offset */ - - Descriptor->Address16.TranslationOffset = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); - break; - - case 10: /* Address Length */ - - Descriptor->Address16.AddressLength = - (UINT16) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); - break; - - case 11: /* ResSourceIndex [Optional Field - BYTE] */ - - if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) - { - OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; - OptionIndex++; - Descriptor->Address16.ResourceLength++; - ResSourceIndex = TRUE; - } - break; - - case 12: /* ResSource [Optional Field - STRING] */ - - if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && - (InitializerOp->Asl.Value.String)) - { - if (StringLength) - { - Descriptor->Address16.ResourceLength = (UINT16) - (Descriptor->Address16.ResourceLength + StringLength); - - strcpy ((char *) - &OptionalFields[OptionIndex], - InitializerOp->Asl.Value.String); - - /* ResourceSourceIndex must also be valid */ - - if (!ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, - InitializerOp, NULL); - } - } - } - -#if 0 - /* - * Not a valid ResourceSource, ResourceSourceIndex must also - * be invalid - */ - else if (ResSourceIndex) - { - AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, - InitializerOp, NULL); - } -#endif - break; - - case 13: /* ResourceTag */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - - Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + - OptionIndex + StringLength; return (Rnode); } @@ -2760,95 +532,3 @@ RsDoVendorLargeDescriptor ( return (Rnode); } - - -/******************************************************************************* - * - * FUNCTION: RsDoGeneralRegisterDescriptor - * - * PARAMETERS: Op - Parent resource descriptor parse node - * CurrentByteOffset - Offset into the resource template AML - * buffer (to track references to the desc) - * - * RETURN: Completed resource node - * - * DESCRIPTION: Construct a long "Register" descriptor - * - ******************************************************************************/ - -ASL_RESOURCE_NODE * -RsDoGeneralRegisterDescriptor ( - ACPI_PARSE_OBJECT *Op, - UINT32 CurrentByteOffset) -{ - AML_RESOURCE *Descriptor; - ACPI_PARSE_OBJECT *InitializerOp; - ASL_RESOURCE_NODE *Rnode; - UINT32 i; - - - InitializerOp = Op->Asl.Child; - Rnode = RsAllocateResourceNode (sizeof (AML_RESOURCE_GENERIC_REGISTER)); - - Descriptor = Rnode->Buffer; - Descriptor->GenericReg.DescriptorType = ACPI_RESOURCE_NAME_GENERIC_REGISTER; - Descriptor->GenericReg.ResourceLength = 12; - - /* Process all child initialization nodes */ - - for (i = 0; InitializerOp; i++) - { - switch (i) - { - case 0: /* Address space */ - - Descriptor->GenericReg.AddressSpaceId = (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_ADDRESSSPACE, - CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.AddressSpaceId)); - break; - - case 1: /* Register Bit Width */ - - Descriptor->GenericReg.BitWidth = (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_REGISTERBITWIDTH, - CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.BitWidth)); - break; - - case 2: /* Register Bit Offset */ - - Descriptor->GenericReg.BitOffset = (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_REGISTERBITOFFSET, - CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.BitOffset)); - break; - - case 3: /* Register Address */ - - Descriptor->GenericReg.Address = InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_ADDRESS, - CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.Address)); - break; - - case 4: /* Access Size (ACPI 3.0) */ - - Descriptor->GenericReg.AccessSize = (UINT8) InitializerOp->Asl.Value.Integer; - RsCreateByteField (InitializerOp, ACPI_RESTAG_ACCESSSIZE, - CurrentByteOffset + ASL_RESDESC_OFFSET (GenericReg.AccessSize)); - break; - - case 5: /* ResourceTag (ACPI 3.0b) */ - - UtAttachNamepathToOwner (Op, InitializerOp); - break; - - default: - - AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); - break; - } - - InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); - } - return (Rnode); -} - - diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2d.c b/sys/contrib/dev/acpica/compiler/aslrestype2d.c new file mode 100644 index 00000000000..db0a2a7b85d --- /dev/null +++ b/sys/contrib/dev/acpica/compiler/aslrestype2d.c @@ -0,0 +1,814 @@ + +/****************************************************************************** + * + * Module Name: aslrestype2d - Large DWord address resource descriptors + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include "aslcompiler.y.h" + +#define _COMPONENT ACPI_COMPILER + ACPI_MODULE_NAME ("aslrestype2d") + +/* + * This module contains the Dword (32-bit) address space descriptors: + * + * DwordIO + * DwordMemory + * DwordSpace + */ + +/******************************************************************************* + * + * FUNCTION: RsDoDwordIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "DwordIO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoDwordIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT8 *OptionalFields; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; + Descriptor->Address32.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); + Descriptor->Address32.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS32) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); + break; + + case 1: /* MinType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); + break; + + case 2: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); + break; + + case 3: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); + break; + + case 4: /* Range Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 0, 3); + RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 0); + break; + + case 5: /* Address Granularity */ + + Descriptor->Address32.Granularity = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); + GranOp = InitializerOp; + break; + + case 6: /* Address Min */ + + Descriptor->Address32.Minimum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); + MinOp = InitializerOp; + break; + + case 7: /* Address Max */ + + Descriptor->Address32.Maximum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); + MaxOp = InitializerOp; + break; + + case 8: /* Translation Offset */ + + Descriptor->Address32.TranslationOffset = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); + break; + + case 9: /* Address Length */ + + Descriptor->Address32.AddressLength = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); + LengthOp = InitializerOp; + break; + + case 10: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + /* Found a valid ResourceSourceIndex */ + + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address32.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 11: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + /* Found a valid ResourceSource */ + + Descriptor->Address32.ResourceLength = (UINT16) + (Descriptor->Address32.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 12: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + case 13: /* Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 4, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 4); + break; + + case 14: /* Translation Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address32.Minimum, + Descriptor->Address32.Maximum, + Descriptor->Address32.AddressLength, + Descriptor->Address32.Granularity, + Descriptor->Address32.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoDwordMemoryDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "DwordMemory" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoDwordMemoryDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; + Descriptor->Address32.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); + Descriptor->Address32.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS32) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); + break; + + case 1: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); + break; + + case 2: /* MinType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); + break; + + case 3: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); + break; + + case 4: /* Memory Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 1); + break; + + case 5: /* Read/Write Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 0, 1); + RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 0); + break; + + case 6: /* Address Granularity */ + + Descriptor->Address32.Granularity = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->Address32.Minimum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->Address32.Maximum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->Address32.TranslationOffset = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->Address32.AddressLength = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address32.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 12: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address32.ResourceLength = (UINT16) + (Descriptor->Address32.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 13: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + + case 14: /* Address Range */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 3); + break; + + case 15: /* Type */ + + RsSetFlagBits (&Descriptor->Address32.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address32.Minimum, + Descriptor->Address32.Maximum, + Descriptor->Address32.AddressLength, + Descriptor->Address32.Granularity, + Descriptor->Address32.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoDwordSpaceDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "DwordSpace" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoDwordSpaceDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS32) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address32.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS32; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS32); + Descriptor->Address32.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS32) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Type */ + + Descriptor->Address32.ResourceType = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 1: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 0, 1); + break; + + case 2: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 1); + break; + + case 3: /* MinType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 2); + break; + + case 4: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address32.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Flags), 3); + break; + + case 5: /* Type-Specific flags */ + + Descriptor->Address32.SpecificFlags = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 6: /* Address Granularity */ + + Descriptor->Address32.Granularity = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->Address32.Minimum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->Address32.Maximum = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->Address32.TranslationOffset = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->Address32.AddressLength = + (UINT32) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address32.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address32.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 12: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address32.ResourceLength = (UINT16) + (Descriptor->Address32.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 13: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, + InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address32.Minimum, + Descriptor->Address32.Maximum, + Descriptor->Address32.AddressLength, + Descriptor->Address32.Granularity, + Descriptor->Address32.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS32) + + OptionIndex + StringLength; + return (Rnode); +} diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2e.c b/sys/contrib/dev/acpica/compiler/aslrestype2e.c new file mode 100644 index 00000000000..b75d174fac1 --- /dev/null +++ b/sys/contrib/dev/acpica/compiler/aslrestype2e.c @@ -0,0 +1,646 @@ + +/****************************************************************************** + * + * Module Name: aslrestype2e - Large Extended address resource descriptors + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include "aslcompiler.y.h" + +#define _COMPONENT ACPI_COMPILER + ACPI_MODULE_NAME ("aslrestype2e") + +/* + * This module contains the Extended (64-bit) address space descriptors: + * + * ExtendedIO + * ExtendedMemory + * ExtendedSpace + */ + +/******************************************************************************* + * + * FUNCTION: RsDoExtendedIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "ExtendedIO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoExtendedIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT16 StringLength = 0; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; + Descriptor->ExtAddress64.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; + Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; + + Descriptor->ExtAddress64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); + break; + + case 1: /* MinType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); + break; + + case 2: /* MaxType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); + break; + + case 3: /* DecodeType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); + break; + + case 4: /* Range Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 0, 3); + RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 0); + break; + + case 5: /* Address Granularity */ + + Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); + GranOp = InitializerOp; + break; + + case 6: /* Address Min */ + + Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); + MinOp = InitializerOp; + break; + + case 7: /* Address Max */ + + Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); + MaxOp = InitializerOp; + break; + + case 8: /* Translation Offset */ + + Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); + break; + + case 9: /* Address Length */ + + Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 10: /* Type-Specific Attributes */ + + Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); + break; + + case 11: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + case 12: /* Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 4, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 4); + break; + + case 13: /* Translation Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->ExtAddress64.Minimum, + Descriptor->ExtAddress64.Maximum, + Descriptor->ExtAddress64.AddressLength, + Descriptor->ExtAddress64.Granularity, + Descriptor->ExtAddress64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoExtendedMemoryDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "ExtendedMemory" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoExtendedMemoryDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT16 StringLength = 0; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; + Descriptor->ExtAddress64.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; + Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; + + Descriptor->ExtAddress64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); + break; + + case 1: /* DecodeType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); + break; + + case 2: /* MinType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); + break; + + case 3: /* MaxType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); + break; + + case 4: /* Memory Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 1); + break; + + case 5: /* Read/Write Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 0, 1); + RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 0); + break; + + case 6: /* Address Granularity */ + + Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* Type-Specific Attributes */ + + Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); + break; + + case 12: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + + case 13: /* Address Range */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 3); + break; + + case 14: /* Type */ + + RsSetFlagBits (&Descriptor->ExtAddress64.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->ExtAddress64.Minimum, + Descriptor->ExtAddress64.Maximum, + Descriptor->ExtAddress64.AddressLength, + Descriptor->ExtAddress64.Granularity, + Descriptor->ExtAddress64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoExtendedSpaceDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "ExtendedSpace" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoExtendedSpaceDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT16 StringLength = 0; + UINT32 i; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->ExtAddress64.DescriptorType = ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64; + Descriptor->ExtAddress64.RevisionID = AML_RESOURCE_EXTENDED_ADDRESS_REVISION; + + Descriptor->ExtAddress64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Type */ + + Descriptor->ExtAddress64.ResourceType = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 1: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 0, 1); + break; + + case 2: /* DecodeType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 1); + break; + + case 3: /* MinType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 2); + break; + + case 4: /* MaxType */ + + RsSetFlagBits (&Descriptor->ExtAddress64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Flags), 3); + break; + + case 5: /* Type-Specific flags */ + + Descriptor->ExtAddress64.SpecificFlags = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 6: /* Address Granularity */ + + Descriptor->ExtAddress64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->ExtAddress64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->ExtAddress64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->ExtAddress64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->ExtAddress64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* Type-Specific Attributes */ + + Descriptor->ExtAddress64.TypeSpecific = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TYPESPECIFICATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (ExtAddress64.TypeSpecific)); + break; + + case 12: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->ExtAddress64.Minimum, + Descriptor->ExtAddress64.Maximum, + Descriptor->ExtAddress64.AddressLength, + Descriptor->ExtAddress64.Granularity, + Descriptor->ExtAddress64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_EXTENDED_ADDRESS64) + StringLength; + return (Rnode); +} diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2q.c b/sys/contrib/dev/acpica/compiler/aslrestype2q.c new file mode 100644 index 00000000000..cea12aa4912 --- /dev/null +++ b/sys/contrib/dev/acpica/compiler/aslrestype2q.c @@ -0,0 +1,793 @@ + +/****************************************************************************** + * + * Module Name: aslrestype2q - Large QWord address resource descriptors + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include "aslcompiler.y.h" + +#define _COMPONENT ACPI_COMPILER + ACPI_MODULE_NAME ("aslrestype2q") + +/* + * This module contains the QWord (64-bit) address space descriptors: + * + * QWordIO + * QWordMemory + * QWordSpace + */ + +/******************************************************************************* + * + * FUNCTION: RsDoQwordIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "QwordIO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoQwordIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; + Descriptor->Address64.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); + Descriptor->Address64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); + break; + + case 1: /* MinType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); + break; + + case 2: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); + break; + + case 3: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); + break; + + case 4: /* Range Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 0, 3); + RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 0); + break; + + case 5: /* Address Granularity */ + + Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); + GranOp = InitializerOp; + break; + + case 6: /* Address Min */ + + Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); + MinOp = InitializerOp; + break; + + case 7: /* Address Max */ + + Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); + MaxOp = InitializerOp; + break; + + case 8: /* Translation Offset */ + + Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); + break; + + case 9: /* Address Length */ + + Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 10: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address64.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 11: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address64.ResourceLength = (UINT16) + (Descriptor->Address64.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 12: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + case 13: /* Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 4, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 4); + break; + + case 14: /* Translation Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address64.Minimum, + Descriptor->Address64.Maximum, + Descriptor->Address64.AddressLength, + Descriptor->Address64.Granularity, + Descriptor->Address64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoQwordMemoryDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "QwordMemory" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoQwordMemoryDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; + Descriptor->Address64.ResourceType = ACPI_ADDRESS_TYPE_MEMORY_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); + Descriptor->Address64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); + break; + + case 1: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); + break; + + case 2: /* MinType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); + break; + + case 3: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); + break; + + case 4: /* Memory Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 1); + break; + + case 5: /* Read/Write Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 0, 1); + RsCreateBitField (InitializerOp, ACPI_RESTAG_READWRITETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 0); + break; + + case 6: /* Address Granularity */ + + Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address64.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 12: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address64.ResourceLength = (UINT16) + (Descriptor->Address64.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 13: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + + case 14: /* Address Range */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MEMATTRIBUTES, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 3); + break; + + case 15: /* Type */ + + RsSetFlagBits (&Descriptor->Address64.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address64.Minimum, + Descriptor->Address64.Maximum, + Descriptor->Address64.AddressLength, + Descriptor->Address64.Granularity, + Descriptor->Address64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoQwordSpaceDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "QwordSpace" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoQwordSpaceDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS64) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address64.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS64; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS64); + Descriptor->Address64.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS64) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Type */ + + Descriptor->Address64.ResourceType = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 1: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 0, 1); + break; + + case 2: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 1); + break; + + case 3: /* MinType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 2); + break; + + case 4: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address64.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Flags), 3); + break; + + case 5: /* Type-Specific flags */ + + Descriptor->Address64.SpecificFlags = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 6: /* Address Granularity */ + + Descriptor->Address64.Granularity = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->Address64.Minimum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->Address64.Maximum = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->Address64.TranslationOffset = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->Address64.AddressLength = InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address64.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address64.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 12: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address64.ResourceLength = (UINT16) + (Descriptor->Address64.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 13: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address64.Minimum, + Descriptor->Address64.Maximum, + Descriptor->Address64.AddressLength, + Descriptor->Address64.Granularity, + Descriptor->Address64.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS64) + + OptionIndex + StringLength; + return (Rnode); +} diff --git a/sys/contrib/dev/acpica/compiler/aslrestype2w.c b/sys/contrib/dev/acpica/compiler/aslrestype2w.c new file mode 100644 index 00000000000..fa0bb2226a1 --- /dev/null +++ b/sys/contrib/dev/acpica/compiler/aslrestype2w.c @@ -0,0 +1,774 @@ + +/****************************************************************************** + * + * Module Name: aslrestype2w - Large Word address resource descriptors + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include "aslcompiler.y.h" + +#define _COMPONENT ACPI_COMPILER + ACPI_MODULE_NAME ("aslrestype2w") + +/* + * This module contains the Word (16-bit) address space descriptors: + * + * WordIO + * WordMemory + * WordSpace + */ + +/******************************************************************************* + * + * FUNCTION: RsDoWordIoDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "WordIO" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoWordIoDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; + Descriptor->Address16.ResourceType = ACPI_ADDRESS_TYPE_IO_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); + Descriptor->Address16.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS16) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); + break; + + case 1: /* MinType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); + break; + + case 2: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); + break; + + case 3: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); + break; + + case 4: /* Range Type */ + + RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 0, 3); + RsCreateBitField (InitializerOp, ACPI_RESTAG_RANGETYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 0); + break; + + case 5: /* Address Granularity */ + + Descriptor->Address16.Granularity = (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); + GranOp = InitializerOp; + break; + + case 6: /* Address Min */ + + Descriptor->Address16.Minimum = (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); + MinOp = InitializerOp; + break; + + case 7: /* Address Max */ + + Descriptor->Address16.Maximum = (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); + MaxOp = InitializerOp; + break; + + case 8: /* Translation Offset */ + + Descriptor->Address16.TranslationOffset = (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); + break; + + case 9: /* Address Length */ + + Descriptor->Address16.AddressLength = (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); + LengthOp = InitializerOp; + break; + + case 10: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address16.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 11: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address16.ResourceLength = (UINT16) + (Descriptor->Address16.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 12: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + case 13: /* Type */ + + RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 4, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 4); + break; + + case 14: /* Translation Type */ + + RsSetFlagBits (&Descriptor->Address16.SpecificFlags, InitializerOp, 5, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_TRANSTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.SpecificFlags), 5); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address16.Minimum, + Descriptor->Address16.Maximum, + Descriptor->Address16.AddressLength, + Descriptor->Address16.Granularity, + Descriptor->Address16.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoWordBusNumberDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "WordBusNumber" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoWordBusNumberDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; + Descriptor->Address16.ResourceType = ACPI_ADDRESS_TYPE_BUS_NUMBER_RANGE; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); + Descriptor->Address16.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS16) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); + break; + + case 1: /* MinType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); + break; + + case 2: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); + break; + + case 3: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); + break; + + case 4: /* Address Granularity */ + + Descriptor->Address16.Granularity = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); + GranOp = InitializerOp; + break; + + case 5: /* Min Address */ + + Descriptor->Address16.Minimum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); + MinOp = InitializerOp; + break; + + case 6: /* Max Address */ + + Descriptor->Address16.Maximum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); + MaxOp = InitializerOp; + break; + + case 7: /* Translation Offset */ + + Descriptor->Address16.TranslationOffset = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); + break; + + case 8: /* Address Length */ + + Descriptor->Address16.AddressLength = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); + LengthOp = InitializerOp; + break; + + case 9: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address16.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 10: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address16.ResourceLength = (UINT16) + (Descriptor->Address16.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 11: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address16.Minimum, + Descriptor->Address16.Maximum, + Descriptor->Address16.AddressLength, + Descriptor->Address16.Granularity, + Descriptor->Address16.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + + OptionIndex + StringLength; + return (Rnode); +} + + +/******************************************************************************* + * + * FUNCTION: RsDoWordSpaceDescriptor + * + * PARAMETERS: Op - Parent resource descriptor parse node + * CurrentByteOffset - Offset into the resource template AML + * buffer (to track references to the desc) + * + * RETURN: Completed resource node + * + * DESCRIPTION: Construct a long "WordSpace" descriptor + * + ******************************************************************************/ + +ASL_RESOURCE_NODE * +RsDoWordSpaceDescriptor ( + ACPI_PARSE_OBJECT *Op, + UINT32 CurrentByteOffset) +{ + AML_RESOURCE *Descriptor; + ACPI_PARSE_OBJECT *InitializerOp; + ACPI_PARSE_OBJECT *MinOp = NULL; + ACPI_PARSE_OBJECT *MaxOp = NULL; + ACPI_PARSE_OBJECT *LengthOp = NULL; + ACPI_PARSE_OBJECT *GranOp = NULL; + ASL_RESOURCE_NODE *Rnode; + UINT8 *OptionalFields; + UINT16 StringLength = 0; + UINT32 OptionIndex = 0; + UINT32 i; + BOOLEAN ResSourceIndex = FALSE; + + + InitializerOp = Op->Asl.Child; + StringLength = RsGetStringDataLength (InitializerOp); + + Rnode = RsAllocateResourceNode ( + sizeof (AML_RESOURCE_ADDRESS16) + 1 + StringLength); + + Descriptor = Rnode->Buffer; + Descriptor->Address16.DescriptorType = ACPI_RESOURCE_NAME_ADDRESS16; + + /* + * Initial descriptor length -- may be enlarged if there are + * optional fields present + */ + OptionalFields = ((UINT8 *) Descriptor) + sizeof (AML_RESOURCE_ADDRESS16); + Descriptor->Address16.ResourceLength = (UINT16) + (sizeof (AML_RESOURCE_ADDRESS16) - + sizeof (AML_RESOURCE_LARGE_HEADER)); + + /* Process all child initialization nodes */ + + for (i = 0; InitializerOp; i++) + { + switch (i) + { + case 0: /* Resource Type */ + + Descriptor->Address16.ResourceType = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 1: /* Resource Usage */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 0, 1); + break; + + case 2: /* DecodeType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 1, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_DECODE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 1); + break; + + case 3: /* MinType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 2, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MINTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 2); + break; + + case 4: /* MaxType */ + + RsSetFlagBits (&Descriptor->Address16.Flags, InitializerOp, 3, 0); + RsCreateBitField (InitializerOp, ACPI_RESTAG_MAXTYPE, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Flags), 3); + break; + + case 5: /* Type-Specific flags */ + + Descriptor->Address16.SpecificFlags = + (UINT8) InitializerOp->Asl.Value.Integer; + break; + + case 6: /* Address Granularity */ + + Descriptor->Address16.Granularity = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_GRANULARITY, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Granularity)); + GranOp = InitializerOp; + break; + + case 7: /* Min Address */ + + Descriptor->Address16.Minimum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MINADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Minimum)); + MinOp = InitializerOp; + break; + + case 8: /* Max Address */ + + Descriptor->Address16.Maximum = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_MAXADDR, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.Maximum)); + MaxOp = InitializerOp; + break; + + case 9: /* Translation Offset */ + + Descriptor->Address16.TranslationOffset = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_TRANSLATION, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.TranslationOffset)); + break; + + case 10: /* Address Length */ + + Descriptor->Address16.AddressLength = + (UINT16) InitializerOp->Asl.Value.Integer; + RsCreateByteField (InitializerOp, ACPI_RESTAG_LENGTH, + CurrentByteOffset + ASL_RESDESC_OFFSET (Address16.AddressLength)); + LengthOp = InitializerOp; + break; + + case 11: /* ResSourceIndex [Optional Field - BYTE] */ + + if (InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) + { + OptionalFields[0] = (UINT8) InitializerOp->Asl.Value.Integer; + OptionIndex++; + Descriptor->Address16.ResourceLength++; + ResSourceIndex = TRUE; + } + break; + + case 12: /* ResSource [Optional Field - STRING] */ + + if ((InitializerOp->Asl.ParseOpcode != PARSEOP_DEFAULT_ARG) && + (InitializerOp->Asl.Value.String)) + { + if (StringLength) + { + Descriptor->Address16.ResourceLength = (UINT16) + (Descriptor->Address16.ResourceLength + StringLength); + + strcpy ((char *) + &OptionalFields[OptionIndex], + InitializerOp->Asl.Value.String); + + /* ResourceSourceIndex must also be valid */ + + if (!ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_INDEX, + InitializerOp, NULL); + } + } + } + +#if 0 + /* + * Not a valid ResourceSource, ResourceSourceIndex must also + * be invalid + */ + else if (ResSourceIndex) + { + AslError (ASL_ERROR, ASL_MSG_RESOURCE_SOURCE, + InitializerOp, NULL); + } +#endif + break; + + case 13: /* ResourceTag */ + + UtAttachNamepathToOwner (Op, InitializerOp); + break; + + default: + + AslError (ASL_ERROR, ASL_MSG_RESOURCE_LIST, InitializerOp, NULL); + break; + } + + InitializerOp = RsCompleteNodeAndGetNext (InitializerOp); + } + + /* Validate the Min/Max/Len/Gran values */ + + RsLargeAddressCheck ( + Descriptor->Address16.Minimum, + Descriptor->Address16.Maximum, + Descriptor->Address16.AddressLength, + Descriptor->Address16.Granularity, + Descriptor->Address16.Flags, + MinOp, MaxOp, LengthOp, GranOp); + + Rnode->BufferLength = sizeof (AML_RESOURCE_ADDRESS16) + + OptionIndex + StringLength; + return (Rnode); +} diff --git a/sys/contrib/dev/acpica/compiler/asltypes.h b/sys/contrib/dev/acpica/compiler/asltypes.h index 30b8fdfe7d0..815763499d5 100644 --- a/sys/contrib/dev/acpica/compiler/asltypes.h +++ b/sys/contrib/dev/acpica/compiler/asltypes.h @@ -420,7 +420,16 @@ typedef enum ASL_MSG_SERIALIZED, ASL_MSG_COMPILER_RESERVED, ASL_MSG_NAMED_OBJECT_IN_WHILE, - ASL_MSG_LOCAL_OUTSIDE_METHOD + ASL_MSG_LOCAL_OUTSIDE_METHOD, + ASL_MSG_ALIGNMENT, + ASL_MSG_ISA_ADDRESS, + ASL_MSG_INVALID_MIN_MAX, + ASL_MSG_INVALID_LENGTH, + ASL_MSG_INVALID_LENGTH_FIXED, + ASL_MSG_INVALID_GRANULARITY, + ASL_MSG_INVALID_GRAN_FIXED, + ASL_MSG_INVALID_ACCESS_SIZE, + ASL_MSG_INVALID_ADDR_FLAGS } ASL_MESSAGE_IDS; @@ -540,7 +549,16 @@ char *AslMessages [] = { /* ASL_MSG_SERIALIZED */ "Control Method marked Serialized", /* ASL_MSG_COMPILER_RESERVED */ "Use of compiler reserved name", /* ASL_MSG_NAMED_OBJECT_IN_WHILE */ "Creating a named object in a While loop", -/* ASL_MSG_LOCAL_OUTSIDE_METHOD */ "Local or Arg used outside a control method" +/* ASL_MSG_LOCAL_OUTSIDE_METHOD */ "Local or Arg used outside a control method", +/* ASL_MSG_ALIGNMENT */ "Must be a multiple of alignment/granularity value", +/* ASL_MSG_ISA_ADDRESS */ "Maximum 10-bit ISA address (0x3FF)", +/* ASL_MSG_INVALID_MIN_MAX */ "Address Min is greater than Address Max", +/* ASL_MSG_INVALID_LENGTH */ "Length is larger than Min/Max window", +/* ASL_MSG_INVALID_LENGTH_FIXED */ "Length is not equal to fixed Min/Max window", +/* ASL_MSG_INVALID_GRANULARITY */ "Granularity must be zero or a power of two minus one", +/* ASL_MSG_INVALID_GRAN_FIXED */ "Granularity must be zero for fixed Min/Max", +/* ASL_MSG_INVALID_ACCESS_SIZE */ "Invalid AccessSize (Maximum is 4 - QWord access)", +/* ASL_MSG_INVALID_ADDR_FLAGS */ "Invalid combination of Length and Min/Max fixed flags" }; diff --git a/sys/contrib/dev/acpica/debugger/dbcmds.c b/sys/contrib/dev/acpica/debugger/dbcmds.c index 0c704e7e386..a5f594145ea 100644 --- a/sys/contrib/dev/acpica/debugger/dbcmds.c +++ b/sys/contrib/dev/acpica/debugger/dbcmds.c @@ -675,7 +675,7 @@ AcpiDbDisplayTableInfo ( /* Walk the entire root table list */ - for (i = 0; i < AcpiGbl_RootTableList.Count; i++) + for (i = 0; i < AcpiGbl_RootTableList.CurrentTableCount; i++) { TableDesc = &AcpiGbl_RootTableList.Tables[i]; AcpiOsPrintf ("%d ", i); diff --git a/sys/contrib/dev/acpica/debugger/dbdisply.c b/sys/contrib/dev/acpica/debugger/dbdisply.c index 41094891ad7..9c85200b31d 100644 --- a/sys/contrib/dev/acpica/debugger/dbdisply.c +++ b/sys/contrib/dev/acpica/debugger/dbdisply.c @@ -816,6 +816,7 @@ AcpiDbDisplayGpes ( ACPI_GPE_XRUPT_INFO *GpeXruptInfo; ACPI_GPE_EVENT_INFO *GpeEventInfo; ACPI_GPE_REGISTER_INFO *GpeRegisterInfo; + char *GpeType; UINT32 GpeIndex; UINT32 Block = 0; UINT32 i; @@ -844,8 +845,17 @@ AcpiDbDisplayGpes ( AcpiOsPrintf ("Could not convert name to pathname\n"); } - AcpiOsPrintf ("\nBlock %d - Info %p DeviceNode %p [%s]\n", - Block, GpeBlock, GpeBlock->Node, Buffer); + if (GpeBlock->Node == AcpiGbl_FadtGpeDevice) + { + GpeType = "FADT-defined GPE block"; + } + else + { + GpeType = "GPE Block Device"; + } + + AcpiOsPrintf ("\nBlock %d - Info %p DeviceNode %p [%s] - %s\n", + Block, GpeBlock, GpeBlock->Node, Buffer, GpeType); AcpiOsPrintf (" Registers: %u (%u GPEs)\n", GpeBlock->RegisterCount, GpeBlock->GpeCount); diff --git a/sys/contrib/dev/acpica/events/evgpe.c b/sys/contrib/dev/acpica/events/evgpe.c index 0ece12ca78c..892b2c37f52 100644 --- a/sys/contrib/dev/acpica/events/evgpe.c +++ b/sys/contrib/dev/acpica/events/evgpe.c @@ -370,7 +370,7 @@ AcpiEvGetGpeEventInfo ( ACPI_FUNCTION_ENTRY (); - /* A NULL GpeBlock means use the FADT-defined GPE block(s) */ + /* A NULL GpeDevice means use the FADT-defined GPE block(s) */ if (!GpeDevice) { diff --git a/sys/contrib/dev/acpica/events/evgpeblk.c b/sys/contrib/dev/acpica/events/evgpeblk.c index 29b0e711e5f..4c115fbebe5 100644 --- a/sys/contrib/dev/acpica/events/evgpeblk.c +++ b/sys/contrib/dev/acpica/events/evgpeblk.c @@ -123,28 +123,6 @@ /* Local prototypes */ -static ACPI_STATUS -AcpiEvMatchGpeMethod ( - ACPI_HANDLE ObjHandle, - UINT32 Level, - void *ObjDesc, - void **ReturnValue); - -static ACPI_STATUS -AcpiEvMatchPrwAndGpe ( - ACPI_HANDLE ObjHandle, - UINT32 Level, - void *Info, - void **ReturnValue); - -static ACPI_GPE_XRUPT_INFO * -AcpiEvGetGpeXruptBlock ( - UINT32 InterruptNumber); - -static ACPI_STATUS -AcpiEvDeleteGpeXrupt ( - ACPI_GPE_XRUPT_INFO *GpeXrupt); - static ACPI_STATUS AcpiEvInstallGpeBlock ( ACPI_GPE_BLOCK_INFO *GpeBlock, @@ -155,581 +133,6 @@ AcpiEvCreateGpeInfoBlocks ( ACPI_GPE_BLOCK_INFO *GpeBlock); -/******************************************************************************* - * - * FUNCTION: AcpiEvValidGpeEvent - * - * PARAMETERS: GpeEventInfo - Info for this GPE - * - * RETURN: TRUE if the GpeEvent is valid - * - * DESCRIPTION: Validate a GPE event. DO NOT CALL FROM INTERRUPT LEVEL. - * Should be called only when the GPE lists are semaphore locked - * and not subject to change. - * - ******************************************************************************/ - -BOOLEAN -AcpiEvValidGpeEvent ( - ACPI_GPE_EVENT_INFO *GpeEventInfo) -{ - ACPI_GPE_XRUPT_INFO *GpeXruptBlock; - ACPI_GPE_BLOCK_INFO *GpeBlock; - - - ACPI_FUNCTION_ENTRY (); - - - /* No need for spin lock since we are not changing any list elements */ - - /* Walk the GPE interrupt levels */ - - GpeXruptBlock = AcpiGbl_GpeXruptListHead; - while (GpeXruptBlock) - { - GpeBlock = GpeXruptBlock->GpeBlockListHead; - - /* Walk the GPE blocks on this interrupt level */ - - while (GpeBlock) - { - if ((&GpeBlock->EventInfo[0] <= GpeEventInfo) && - (&GpeBlock->EventInfo[GpeBlock->GpeCount] > GpeEventInfo)) - { - return (TRUE); - } - - GpeBlock = GpeBlock->Next; - } - - GpeXruptBlock = GpeXruptBlock->Next; - } - - return (FALSE); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvWalkGpeList - * - * PARAMETERS: GpeWalkCallback - Routine called for each GPE block - * Context - Value passed to callback - * - * RETURN: Status - * - * DESCRIPTION: Walk the GPE lists. - * - ******************************************************************************/ - -ACPI_STATUS -AcpiEvWalkGpeList ( - ACPI_GPE_CALLBACK GpeWalkCallback, - void *Context) -{ - ACPI_GPE_BLOCK_INFO *GpeBlock; - ACPI_GPE_XRUPT_INFO *GpeXruptInfo; - ACPI_STATUS Status = AE_OK; - ACPI_CPU_FLAGS Flags; - - - ACPI_FUNCTION_TRACE (EvWalkGpeList); - - - Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); - - /* Walk the interrupt level descriptor list */ - - GpeXruptInfo = AcpiGbl_GpeXruptListHead; - while (GpeXruptInfo) - { - /* Walk all Gpe Blocks attached to this interrupt level */ - - GpeBlock = GpeXruptInfo->GpeBlockListHead; - while (GpeBlock) - { - /* One callback per GPE block */ - - Status = GpeWalkCallback (GpeXruptInfo, GpeBlock, Context); - if (ACPI_FAILURE (Status)) - { - if (Status == AE_CTRL_END) /* Callback abort */ - { - Status = AE_OK; - } - goto UnlockAndExit; - } - - GpeBlock = GpeBlock->Next; - } - - GpeXruptInfo = GpeXruptInfo->Next; - } - -UnlockAndExit: - AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); - return_ACPI_STATUS (Status); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvDeleteGpeHandlers - * - * PARAMETERS: GpeXruptInfo - GPE Interrupt info - * GpeBlock - Gpe Block info - * - * RETURN: Status - * - * DESCRIPTION: Delete all Handler objects found in the GPE data structs. - * Used only prior to termination. - * - ******************************************************************************/ - -ACPI_STATUS -AcpiEvDeleteGpeHandlers ( - ACPI_GPE_XRUPT_INFO *GpeXruptInfo, - ACPI_GPE_BLOCK_INFO *GpeBlock, - void *Context) -{ - ACPI_GPE_EVENT_INFO *GpeEventInfo; - UINT32 i; - UINT32 j; - - - ACPI_FUNCTION_TRACE (EvDeleteGpeHandlers); - - - /* Examine each GPE Register within the block */ - - for (i = 0; i < GpeBlock->RegisterCount; i++) - { - /* Now look at the individual GPEs in this byte register */ - - for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) - { - GpeEventInfo = &GpeBlock->EventInfo[((ACPI_SIZE) i * - ACPI_GPE_REGISTER_WIDTH) + j]; - - if ((GpeEventInfo->Flags & ACPI_GPE_DISPATCH_MASK) == - ACPI_GPE_DISPATCH_HANDLER) - { - ACPI_FREE (GpeEventInfo->Dispatch.Handler); - GpeEventInfo->Dispatch.Handler = NULL; - GpeEventInfo->Flags &= ~ACPI_GPE_DISPATCH_MASK; - } - } - } - - return_ACPI_STATUS (AE_OK); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvMatchGpeMethod - * - * PARAMETERS: Callback from WalkNamespace - * - * RETURN: Status - * - * DESCRIPTION: Called from AcpiWalkNamespace. Expects each object to be a - * control method under the _GPE portion of the namespace. - * Extract the name and GPE type from the object, saving this - * information for quick lookup during GPE dispatch - * - * The name of each GPE control method is of the form: - * "_Lxx" or "_Exx", where: - * L - means that the GPE is level triggered - * E - means that the GPE is edge triggered - * xx - is the GPE number [in HEX] - * - ******************************************************************************/ - -static ACPI_STATUS -AcpiEvMatchGpeMethod ( - ACPI_HANDLE ObjHandle, - UINT32 Level, - void *ObjDesc, - void **ReturnValue) -{ - ACPI_NAMESPACE_NODE *MethodNode = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, ObjHandle); - ACPI_GPE_BLOCK_INFO *GpeBlock = (void *) ObjDesc; - ACPI_GPE_EVENT_INFO *GpeEventInfo; - UINT32 GpeNumber; - char Name[ACPI_NAME_SIZE + 1]; - UINT8 Type; - - - ACPI_FUNCTION_TRACE (EvMatchGpeMethod); - - - /* - * Match and decode the _Lxx and _Exx GPE method names - * - * 1) Extract the method name and null terminate it - */ - ACPI_MOVE_32_TO_32 (Name, &MethodNode->Name.Integer); - Name[ACPI_NAME_SIZE] = 0; - - /* 2) Name must begin with an underscore */ - - if (Name[0] != '_') - { - return_ACPI_STATUS (AE_OK); /* Ignore this method */ - } - - /* - * 3) Edge/Level determination is based on the 2nd character - * of the method name - * - * NOTE: Default GPE type is RUNTIME only. Later, if a _PRW object is - * found that points to this GPE, the ACPI_GPE_CAN_WAKE flag is set. - */ - switch (Name[1]) - { - case 'L': - Type = ACPI_GPE_LEVEL_TRIGGERED; - break; - - case 'E': - Type = ACPI_GPE_EDGE_TRIGGERED; - break; - - default: - /* Unknown method type, just ignore it */ - - ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, - "Ignoring unknown GPE method type: %s " - "(name not of form _Lxx or _Exx)", Name)); - return_ACPI_STATUS (AE_OK); - } - - /* 4) The last two characters of the name are the hex GPE Number */ - - GpeNumber = ACPI_STRTOUL (&Name[2], NULL, 16); - if (GpeNumber == ACPI_UINT32_MAX) - { - /* Conversion failed; invalid method, just ignore it */ - - ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, - "Could not extract GPE number from name: %s " - "(name is not of form _Lxx or _Exx)", Name)); - return_ACPI_STATUS (AE_OK); - } - - /* Ensure that we have a valid GPE number for this GPE block */ - - GpeEventInfo = AcpiEvLowGetGpeInfo (GpeNumber, GpeBlock); - if (!GpeEventInfo) - { - /* - * This GpeNumber is not valid for this GPE block, just ignore it. - * However, it may be valid for a different GPE block, since GPE0 - * and GPE1 methods both appear under \_GPE. - */ - return_ACPI_STATUS (AE_OK); - } - - /* - * Add the GPE information from above to the GpeEventInfo block for - * use during dispatch of this GPE. - */ - GpeEventInfo->Flags = (UINT8) (Type | ACPI_GPE_DISPATCH_METHOD); - GpeEventInfo->Dispatch.MethodNode = MethodNode; - - ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, - "Registered GPE method %s as GPE number 0x%.2X\n", - Name, GpeNumber)); - return_ACPI_STATUS (AE_OK); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvMatchPrwAndGpe - * - * PARAMETERS: Callback from WalkNamespace - * - * RETURN: Status. NOTE: We ignore errors so that the _PRW walk is - * not aborted on a single _PRW failure. - * - * DESCRIPTION: Called from AcpiWalkNamespace. Expects each object to be a - * Device. Run the _PRW method. If present, extract the GPE - * number and mark the GPE as a CAN_WAKE GPE. - * - ******************************************************************************/ - -static ACPI_STATUS -AcpiEvMatchPrwAndGpe ( - ACPI_HANDLE ObjHandle, - UINT32 Level, - void *Info, - void **ReturnValue) -{ - ACPI_GPE_WALK_INFO *GpeInfo = (void *) Info; - ACPI_NAMESPACE_NODE *GpeDevice; - ACPI_GPE_BLOCK_INFO *GpeBlock; - ACPI_NAMESPACE_NODE *TargetGpeDevice; - ACPI_GPE_EVENT_INFO *GpeEventInfo; - ACPI_OPERAND_OBJECT *PkgDesc; - ACPI_OPERAND_OBJECT *ObjDesc; - UINT32 GpeNumber; - ACPI_STATUS Status; - - - ACPI_FUNCTION_TRACE (EvMatchPrwAndGpe); - - - /* Check for a _PRW method under this device */ - - Status = AcpiUtEvaluateObject (ObjHandle, METHOD_NAME__PRW, - ACPI_BTYPE_PACKAGE, &PkgDesc); - if (ACPI_FAILURE (Status)) - { - /* Ignore all errors from _PRW, we don't want to abort the walk */ - - return_ACPI_STATUS (AE_OK); - } - - /* The returned _PRW package must have at least two elements */ - - if (PkgDesc->Package.Count < 2) - { - goto Cleanup; - } - - /* Extract pointers from the input context */ - - GpeDevice = GpeInfo->GpeDevice; - GpeBlock = GpeInfo->GpeBlock; - - /* - * The _PRW object must return a package, we are only interested in the - * first element - */ - ObjDesc = PkgDesc->Package.Elements[0]; - - if (ObjDesc->Common.Type == ACPI_TYPE_INTEGER) - { - /* Use FADT-defined GPE device (from definition of _PRW) */ - - TargetGpeDevice = AcpiGbl_FadtGpeDevice; - - /* Integer is the GPE number in the FADT described GPE blocks */ - - GpeNumber = (UINT32) ObjDesc->Integer.Value; - } - else if (ObjDesc->Common.Type == ACPI_TYPE_PACKAGE) - { - /* Package contains a GPE reference and GPE number within a GPE block */ - - if ((ObjDesc->Package.Count < 2) || - ((ObjDesc->Package.Elements[0])->Common.Type != - ACPI_TYPE_LOCAL_REFERENCE) || - ((ObjDesc->Package.Elements[1])->Common.Type != - ACPI_TYPE_INTEGER)) - { - goto Cleanup; - } - - /* Get GPE block reference and decode */ - - TargetGpeDevice = ObjDesc->Package.Elements[0]->Reference.Node; - GpeNumber = (UINT32) ObjDesc->Package.Elements[1]->Integer.Value; - } - else - { - /* Unknown type, just ignore it */ - - goto Cleanup; - } - - /* - * Is this GPE within this block? - * - * TRUE if and only if these conditions are true: - * 1) The GPE devices match. - * 2) The GPE index(number) is within the range of the Gpe Block - * associated with the GPE device. - */ - if (GpeDevice != TargetGpeDevice) - { - goto Cleanup; - } - - GpeEventInfo = AcpiEvLowGetGpeInfo (GpeNumber, GpeBlock); - if (GpeEventInfo) - { - /* This GPE can wake the system */ - - GpeEventInfo->Flags |= ACPI_GPE_CAN_WAKE; - } - -Cleanup: - AcpiUtRemoveReference (PkgDesc); - return_ACPI_STATUS (AE_OK); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvGetGpeXruptBlock - * - * PARAMETERS: InterruptNumber - Interrupt for a GPE block - * - * RETURN: A GPE interrupt block - * - * DESCRIPTION: Get or Create a GPE interrupt block. There is one interrupt - * block per unique interrupt level used for GPEs. Should be - * called only when the GPE lists are semaphore locked and not - * subject to change. - * - ******************************************************************************/ - -static ACPI_GPE_XRUPT_INFO * -AcpiEvGetGpeXruptBlock ( - UINT32 InterruptNumber) -{ - ACPI_GPE_XRUPT_INFO *NextGpeXrupt; - ACPI_GPE_XRUPT_INFO *GpeXrupt; - ACPI_STATUS Status; - ACPI_CPU_FLAGS Flags; - - - ACPI_FUNCTION_TRACE (EvGetGpeXruptBlock); - - - /* No need for lock since we are not changing any list elements here */ - - NextGpeXrupt = AcpiGbl_GpeXruptListHead; - while (NextGpeXrupt) - { - if (NextGpeXrupt->InterruptNumber == InterruptNumber) - { - return_PTR (NextGpeXrupt); - } - - NextGpeXrupt = NextGpeXrupt->Next; - } - - /* Not found, must allocate a new xrupt descriptor */ - - GpeXrupt = ACPI_ALLOCATE_ZEROED (sizeof (ACPI_GPE_XRUPT_INFO)); - if (!GpeXrupt) - { - return_PTR (NULL); - } - - GpeXrupt->InterruptNumber = InterruptNumber; - - /* Install new interrupt descriptor with spin lock */ - - Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); - if (AcpiGbl_GpeXruptListHead) - { - NextGpeXrupt = AcpiGbl_GpeXruptListHead; - while (NextGpeXrupt->Next) - { - NextGpeXrupt = NextGpeXrupt->Next; - } - - NextGpeXrupt->Next = GpeXrupt; - GpeXrupt->Previous = NextGpeXrupt; - } - else - { - AcpiGbl_GpeXruptListHead = GpeXrupt; - } - AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); - - /* Install new interrupt handler if not SCI_INT */ - - if (InterruptNumber != AcpiGbl_FADT.SciInterrupt) - { - Status = AcpiOsInstallInterruptHandler (InterruptNumber, - AcpiEvGpeXruptHandler, GpeXrupt); - if (ACPI_FAILURE (Status)) - { - ACPI_ERROR ((AE_INFO, - "Could not install GPE interrupt handler at level 0x%X", - InterruptNumber)); - return_PTR (NULL); - } - } - - return_PTR (GpeXrupt); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvDeleteGpeXrupt - * - * PARAMETERS: GpeXrupt - A GPE interrupt info block - * - * RETURN: Status - * - * DESCRIPTION: Remove and free a GpeXrupt block. Remove an associated - * interrupt handler if not the SCI interrupt. - * - ******************************************************************************/ - -static ACPI_STATUS -AcpiEvDeleteGpeXrupt ( - ACPI_GPE_XRUPT_INFO *GpeXrupt) -{ - ACPI_STATUS Status; - ACPI_CPU_FLAGS Flags; - - - ACPI_FUNCTION_TRACE (EvDeleteGpeXrupt); - - - /* We never want to remove the SCI interrupt handler */ - - if (GpeXrupt->InterruptNumber == AcpiGbl_FADT.SciInterrupt) - { - GpeXrupt->GpeBlockListHead = NULL; - return_ACPI_STATUS (AE_OK); - } - - /* Disable this interrupt */ - - Status = AcpiOsRemoveInterruptHandler ( - GpeXrupt->InterruptNumber, AcpiEvGpeXruptHandler); - if (ACPI_FAILURE (Status)) - { - return_ACPI_STATUS (Status); - } - - /* Unlink the interrupt block with lock */ - - Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); - if (GpeXrupt->Previous) - { - GpeXrupt->Previous->Next = GpeXrupt->Next; - } - else - { - /* No previous, update list head */ - - AcpiGbl_GpeXruptListHead = GpeXrupt->Next; - } - - if (GpeXrupt->Next) - { - GpeXrupt->Next->Previous = GpeXrupt->Previous; - } - AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); - - /* Free the block */ - - ACPI_FREE (GpeXrupt); - return_ACPI_STATUS (AE_OK); -} - - /******************************************************************************* * * FUNCTION: AcpiEvInstallGpeBlock @@ -1041,6 +444,7 @@ AcpiEvCreateGpeBlock ( { ACPI_STATUS Status; ACPI_GPE_BLOCK_INFO *GpeBlock; + ACPI_GPE_WALK_INFO WalkInfo; ACPI_FUNCTION_TRACE (EvCreateGpeBlock); @@ -1089,11 +493,16 @@ AcpiEvCreateGpeBlock ( return_ACPI_STATUS (Status); } - /* Find all GPE methods (_Lxx, _Exx) for this block */ + /* Find all GPE methods (_Lxx or_Exx) for this block */ + + WalkInfo.GpeBlock = GpeBlock; + WalkInfo.GpeDevice = GpeDevice; + WalkInfo.EnableThisGpe = FALSE; + WalkInfo.ExecuteByOwnerId = FALSE; Status = AcpiNsWalkNamespace (ACPI_TYPE_METHOD, GpeDevice, ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, - AcpiEvMatchGpeMethod, NULL, GpeBlock, NULL); + AcpiEvMatchGpeMethod, NULL, &WalkInfo, NULL); /* Return the new block */ @@ -1139,7 +548,7 @@ AcpiEvInitializeGpeBlock ( { ACPI_STATUS Status; ACPI_GPE_EVENT_INFO *GpeEventInfo; - ACPI_GPE_WALK_INFO GpeInfo; + ACPI_GPE_WALK_INFO WalkInfo; UINT32 WakeGpeCount; UINT32 GpeEnabledCount; UINT32 GpeIndex; @@ -1170,12 +579,13 @@ AcpiEvInitializeGpeBlock ( * definition a wake GPE and will not be enabled while the machine * is running. */ - GpeInfo.GpeBlock = GpeBlock; - GpeInfo.GpeDevice = GpeDevice; + WalkInfo.GpeBlock = GpeBlock; + WalkInfo.GpeDevice = GpeDevice; + WalkInfo.ExecuteByOwnerId = FALSE; Status = AcpiNsWalkNamespace (ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - AcpiEvMatchPrwAndGpe, NULL, &GpeInfo, NULL); + AcpiEvMatchPrwAndGpe, NULL, &WalkInfo, NULL); if (ACPI_FAILURE (Status)) { ACPI_EXCEPTION ((AE_INFO, Status, "While executing _PRW methods")); @@ -1238,165 +648,13 @@ AcpiEvInitializeGpeBlock ( } } - ACPI_DEBUG_PRINT ((ACPI_DB_INIT, - "Found %u Wake, Enabled %u Runtime GPEs in this block\n", - WakeGpeCount, GpeEnabledCount)); - - return_ACPI_STATUS (AE_OK); -} - - -/******************************************************************************* - * - * FUNCTION: AcpiEvGpeInitialize - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Initialize the GPE data structures - * - ******************************************************************************/ - -ACPI_STATUS -AcpiEvGpeInitialize ( - void) -{ - UINT32 RegisterCount0 = 0; - UINT32 RegisterCount1 = 0; - UINT32 GpeNumberMax = 0; - ACPI_STATUS Status; - - - ACPI_FUNCTION_TRACE (EvGpeInitialize); - - - Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE (Status)) + if (GpeEnabledCount || WakeGpeCount) { - return_ACPI_STATUS (Status); - } - - /* - * Initialize the GPE Block(s) defined in the FADT - * - * Why the GPE register block lengths are divided by 2: From the ACPI - * Spec, section "General-Purpose Event Registers", we have: - * - * "Each register block contains two registers of equal length - * GPEx_STS and GPEx_EN (where x is 0 or 1). The length of the - * GPE0_STS and GPE0_EN registers is equal to half the GPE0_LEN - * The length of the GPE1_STS and GPE1_EN registers is equal to - * half the GPE1_LEN. If a generic register block is not supported - * then its respective block pointer and block length values in the - * FADT table contain zeros. The GPE0_LEN and GPE1_LEN do not need - * to be the same size." - */ - - /* - * Determine the maximum GPE number for this machine. - * - * Note: both GPE0 and GPE1 are optional, and either can exist without - * the other. - * - * If EITHER the register length OR the block address are zero, then that - * particular block is not supported. - */ - if (AcpiGbl_FADT.Gpe0BlockLength && - AcpiGbl_FADT.XGpe0Block.Address) - { - /* GPE block 0 exists (has both length and address > 0) */ - - RegisterCount0 = (UINT16) (AcpiGbl_FADT.Gpe0BlockLength / 2); - - GpeNumberMax = (RegisterCount0 * ACPI_GPE_REGISTER_WIDTH) - 1; - - /* Install GPE Block 0 */ - - Status = AcpiEvCreateGpeBlock (AcpiGbl_FadtGpeDevice, - &AcpiGbl_FADT.XGpe0Block, RegisterCount0, 0, - AcpiGbl_FADT.SciInterrupt, &AcpiGbl_GpeFadtBlocks[0]); - - if (ACPI_FAILURE (Status)) - { - ACPI_EXCEPTION ((AE_INFO, Status, - "Could not create GPE Block 0")); - } - } - - if (AcpiGbl_FADT.Gpe1BlockLength && - AcpiGbl_FADT.XGpe1Block.Address) - { - /* GPE block 1 exists (has both length and address > 0) */ - - RegisterCount1 = (UINT16) (AcpiGbl_FADT.Gpe1BlockLength / 2); - - /* Check for GPE0/GPE1 overlap (if both banks exist) */ - - if ((RegisterCount0) && - (GpeNumberMax >= AcpiGbl_FADT.Gpe1Base)) - { - ACPI_ERROR ((AE_INFO, - "GPE0 block (GPE 0 to %u) overlaps the GPE1 block " - "(GPE %u to %u) - Ignoring GPE1", - GpeNumberMax, AcpiGbl_FADT.Gpe1Base, - AcpiGbl_FADT.Gpe1Base + - ((RegisterCount1 * ACPI_GPE_REGISTER_WIDTH) - 1))); - - /* Ignore GPE1 block by setting the register count to zero */ - - RegisterCount1 = 0; - } - else - { - /* Install GPE Block 1 */ - - Status = AcpiEvCreateGpeBlock (AcpiGbl_FadtGpeDevice, - &AcpiGbl_FADT.XGpe1Block, RegisterCount1, - AcpiGbl_FADT.Gpe1Base, - AcpiGbl_FADT.SciInterrupt, &AcpiGbl_GpeFadtBlocks[1]); - - if (ACPI_FAILURE (Status)) - { - ACPI_EXCEPTION ((AE_INFO, Status, - "Could not create GPE Block 1")); - } - - /* - * GPE0 and GPE1 do not have to be contiguous in the GPE number - * space. However, GPE0 always starts at GPE number zero. - */ - GpeNumberMax = AcpiGbl_FADT.Gpe1Base + - ((RegisterCount1 * ACPI_GPE_REGISTER_WIDTH) - 1); - } - } - - /* Exit if there are no GPE registers */ - - if ((RegisterCount0 + RegisterCount1) == 0) - { - /* GPEs are not required by ACPI, this is OK */ - ACPI_DEBUG_PRINT ((ACPI_DB_INIT, - "There are no GPE blocks defined in the FADT\n")); - Status = AE_OK; - goto Cleanup; + "Enabled %u Runtime GPEs, added %u Wake GPEs in this block\n", + GpeEnabledCount, WakeGpeCount)); } - /* Check for Max GPE number out-of-range */ - - if (GpeNumberMax > ACPI_GPE_MAX) - { - ACPI_ERROR ((AE_INFO, - "Maximum GPE number from FADT is too large: 0x%X", - GpeNumberMax)); - Status = AE_BAD_VALUE; - goto Cleanup; - } - -Cleanup: - (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE); return_ACPI_STATUS (AE_OK); } - diff --git a/sys/contrib/dev/acpica/events/evgpeinit.c b/sys/contrib/dev/acpica/events/evgpeinit.c new file mode 100644 index 00000000000..7f41eb7ecc7 --- /dev/null +++ b/sys/contrib/dev/acpica/events/evgpeinit.c @@ -0,0 +1,763 @@ +/****************************************************************************** + * + * Module Name: evgpeinit - System GPE initialization and update + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + +#include +#include +#include +#include +#include + +#define _COMPONENT ACPI_EVENTS + ACPI_MODULE_NAME ("evgpeinit") + + +/******************************************************************************* + * + * FUNCTION: AcpiEvGpeInitialize + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Initialize the GPE data structures and the FADT GPE 0/1 blocks + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvGpeInitialize ( + void) +{ + UINT32 RegisterCount0 = 0; + UINT32 RegisterCount1 = 0; + UINT32 GpeNumberMax = 0; + ACPI_STATUS Status; + + + ACPI_FUNCTION_TRACE (EvGpeInitialize); + + + Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE (Status)) + { + return_ACPI_STATUS (Status); + } + + /* + * Initialize the GPE Block(s) defined in the FADT + * + * Why the GPE register block lengths are divided by 2: From the ACPI + * Spec, section "General-Purpose Event Registers", we have: + * + * "Each register block contains two registers of equal length + * GPEx_STS and GPEx_EN (where x is 0 or 1). The length of the + * GPE0_STS and GPE0_EN registers is equal to half the GPE0_LEN + * The length of the GPE1_STS and GPE1_EN registers is equal to + * half the GPE1_LEN. If a generic register block is not supported + * then its respective block pointer and block length values in the + * FADT table contain zeros. The GPE0_LEN and GPE1_LEN do not need + * to be the same size." + */ + + /* + * Determine the maximum GPE number for this machine. + * + * Note: both GPE0 and GPE1 are optional, and either can exist without + * the other. + * + * If EITHER the register length OR the block address are zero, then that + * particular block is not supported. + */ + if (AcpiGbl_FADT.Gpe0BlockLength && + AcpiGbl_FADT.XGpe0Block.Address) + { + /* GPE block 0 exists (has both length and address > 0) */ + + RegisterCount0 = (UINT16) (AcpiGbl_FADT.Gpe0BlockLength / 2); + + GpeNumberMax = (RegisterCount0 * ACPI_GPE_REGISTER_WIDTH) - 1; + + /* Install GPE Block 0 */ + + Status = AcpiEvCreateGpeBlock (AcpiGbl_FadtGpeDevice, + &AcpiGbl_FADT.XGpe0Block, RegisterCount0, 0, + AcpiGbl_FADT.SciInterrupt, &AcpiGbl_GpeFadtBlocks[0]); + + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "Could not create GPE Block 0")); + } + } + + if (AcpiGbl_FADT.Gpe1BlockLength && + AcpiGbl_FADT.XGpe1Block.Address) + { + /* GPE block 1 exists (has both length and address > 0) */ + + RegisterCount1 = (UINT16) (AcpiGbl_FADT.Gpe1BlockLength / 2); + + /* Check for GPE0/GPE1 overlap (if both banks exist) */ + + if ((RegisterCount0) && + (GpeNumberMax >= AcpiGbl_FADT.Gpe1Base)) + { + ACPI_ERROR ((AE_INFO, + "GPE0 block (GPE 0 to %u) overlaps the GPE1 block " + "(GPE %u to %u) - Ignoring GPE1", + GpeNumberMax, AcpiGbl_FADT.Gpe1Base, + AcpiGbl_FADT.Gpe1Base + + ((RegisterCount1 * ACPI_GPE_REGISTER_WIDTH) - 1))); + + /* Ignore GPE1 block by setting the register count to zero */ + + RegisterCount1 = 0; + } + else + { + /* Install GPE Block 1 */ + + Status = AcpiEvCreateGpeBlock (AcpiGbl_FadtGpeDevice, + &AcpiGbl_FADT.XGpe1Block, RegisterCount1, + AcpiGbl_FADT.Gpe1Base, + AcpiGbl_FADT.SciInterrupt, &AcpiGbl_GpeFadtBlocks[1]); + + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "Could not create GPE Block 1")); + } + + /* + * GPE0 and GPE1 do not have to be contiguous in the GPE number + * space. However, GPE0 always starts at GPE number zero. + */ + GpeNumberMax = AcpiGbl_FADT.Gpe1Base + + ((RegisterCount1 * ACPI_GPE_REGISTER_WIDTH) - 1); + } + } + + /* Exit if there are no GPE registers */ + + if ((RegisterCount0 + RegisterCount1) == 0) + { + /* GPEs are not required by ACPI, this is OK */ + + ACPI_DEBUG_PRINT ((ACPI_DB_INIT, + "There are no GPE blocks defined in the FADT\n")); + Status = AE_OK; + goto Cleanup; + } + + /* Check for Max GPE number out-of-range */ + + if (GpeNumberMax > ACPI_GPE_MAX) + { + ACPI_ERROR ((AE_INFO, + "Maximum GPE number from FADT is too large: 0x%X", + GpeNumberMax)); + Status = AE_BAD_VALUE; + goto Cleanup; + } + +Cleanup: + (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE); + return_ACPI_STATUS (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvUpdateGpes + * + * PARAMETERS: TableOwnerId - ID of the newly-loaded ACPI table + * + * RETURN: None + * + * DESCRIPTION: Check for new GPE methods (_Lxx/_Exx) made available as a + * result of a Load() or LoadTable() operation. If new GPE + * methods have been installed, register the new methods and + * enable and runtime GPEs that are associated with them. Also, + * run any newly loaded _PRW methods in order to discover any + * new CAN_WAKE GPEs. + * + ******************************************************************************/ + +void +AcpiEvUpdateGpes ( + ACPI_OWNER_ID TableOwnerId) +{ + ACPI_GPE_XRUPT_INFO *GpeXruptInfo; + ACPI_GPE_BLOCK_INFO *GpeBlock; + ACPI_GPE_WALK_INFO WalkInfo; + ACPI_STATUS Status = AE_OK; + UINT32 NewWakeGpeCount = 0; + + + /* We will examine only _PRW/_Lxx/_Exx methods owned by this table */ + + WalkInfo.OwnerId = TableOwnerId; + WalkInfo.ExecuteByOwnerId = TRUE; + WalkInfo.Count = 0; + + if (AcpiGbl_LeaveWakeGpesDisabled) + { + /* + * 1) Run any newly-loaded _PRW methods to find any GPEs that + * can now be marked as CAN_WAKE GPEs. Note: We must run the + * _PRW methods before we process the _Lxx/_Exx methods because + * we will enable all runtime GPEs associated with the new + * _Lxx/_Exx methods at the time we process those methods. + * + * Unlock interpreter so that we can run the _PRW methods. + */ + WalkInfo.GpeBlock = NULL; + WalkInfo.GpeDevice = NULL; + + AcpiExExitInterpreter (); + + Status = AcpiNsWalkNamespace (ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, + AcpiEvMatchPrwAndGpe, NULL, &WalkInfo, NULL); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "While executing _PRW methods")); + } + + AcpiExEnterInterpreter (); + NewWakeGpeCount = WalkInfo.Count; + } + + /* + * 2) Find any _Lxx/_Exx GPE methods that have just been loaded. + * + * Any GPEs that correspond to new _Lxx/_Exx methods and are not + * marked as CAN_WAKE are immediately enabled. + * + * Examine the namespace underneath each GpeDevice within the + * GpeBlock lists. + */ + Status = AcpiUtAcquireMutex (ACPI_MTX_EVENTS); + if (ACPI_FAILURE (Status)) + { + return; + } + + WalkInfo.Count = 0; + WalkInfo.EnableThisGpe = TRUE; + + /* Walk the interrupt level descriptor list */ + + GpeXruptInfo = AcpiGbl_GpeXruptListHead; + while (GpeXruptInfo) + { + /* Walk all Gpe Blocks attached to this interrupt level */ + + GpeBlock = GpeXruptInfo->GpeBlockListHead; + while (GpeBlock) + { + WalkInfo.GpeBlock = GpeBlock; + WalkInfo.GpeDevice = GpeBlock->Node; + + Status = AcpiNsWalkNamespace (ACPI_TYPE_METHOD, + WalkInfo.GpeDevice, ACPI_UINT32_MAX, + ACPI_NS_WALK_NO_UNLOCK, AcpiEvMatchGpeMethod, + NULL, &WalkInfo, NULL); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "While decoding _Lxx/_Exx methods")); + } + + GpeBlock = GpeBlock->Next; + } + + GpeXruptInfo = GpeXruptInfo->Next; + } + + if (WalkInfo.Count || NewWakeGpeCount) + { + ACPI_INFO ((AE_INFO, + "Enabled %u new runtime GPEs, added %u new wakeup GPEs", + WalkInfo.Count, NewWakeGpeCount)); + } + + (void) AcpiUtReleaseMutex (ACPI_MTX_EVENTS); + return; +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvMatchGpeMethod + * + * PARAMETERS: Callback from WalkNamespace + * + * RETURN: Status + * + * DESCRIPTION: Called from AcpiWalkNamespace. Expects each object to be a + * control method under the _GPE portion of the namespace. + * Extract the name and GPE type from the object, saving this + * information for quick lookup during GPE dispatch. Allows a + * per-OwnerId evaluation if ExecuteByOwnerId is TRUE in the + * WalkInfo parameter block. + * + * The name of each GPE control method is of the form: + * "_Lxx" or "_Exx", where: + * L - means that the GPE is level triggered + * E - means that the GPE is edge triggered + * xx - is the GPE number [in HEX] + * + * If WalkInfo->ExecuteByOwnerId is TRUE, we only execute examine GPE methods + * with that owner. + * If WalkInfo->EnableThisGpe is TRUE, the GPE that is referred to by a GPE + * method is immediately enabled (Used for Load/LoadTable operators) + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvMatchGpeMethod ( + ACPI_HANDLE ObjHandle, + UINT32 Level, + void *Context, + void **ReturnValue) +{ + ACPI_NAMESPACE_NODE *MethodNode = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, ObjHandle); + ACPI_GPE_WALK_INFO *WalkInfo = ACPI_CAST_PTR (ACPI_GPE_WALK_INFO, Context); + ACPI_GPE_EVENT_INFO *GpeEventInfo; + ACPI_NAMESPACE_NODE *GpeDevice; + ACPI_STATUS Status; + UINT32 GpeNumber; + char Name[ACPI_NAME_SIZE + 1]; + UINT8 Type; + + + ACPI_FUNCTION_TRACE (EvMatchGpeMethod); + + + /* Check if requested OwnerId matches this OwnerId */ + + if ((WalkInfo->ExecuteByOwnerId) && + (MethodNode->OwnerId != WalkInfo->OwnerId)) + { + return_ACPI_STATUS (AE_OK); + } + + /* + * Match and decode the _Lxx and _Exx GPE method names + * + * 1) Extract the method name and null terminate it + */ + ACPI_MOVE_32_TO_32 (Name, &MethodNode->Name.Integer); + Name[ACPI_NAME_SIZE] = 0; + + /* 2) Name must begin with an underscore */ + + if (Name[0] != '_') + { + return_ACPI_STATUS (AE_OK); /* Ignore this method */ + } + + /* + * 3) Edge/Level determination is based on the 2nd character + * of the method name + * + * NOTE: Default GPE type is RUNTIME only. Later, if a _PRW object is + * found that points to this GPE, the ACPI_GPE_CAN_WAKE flag is set. + */ + switch (Name[1]) + { + case 'L': + Type = ACPI_GPE_LEVEL_TRIGGERED; + break; + + case 'E': + Type = ACPI_GPE_EDGE_TRIGGERED; + break; + + default: + /* Unknown method type, just ignore it */ + + ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, + "Ignoring unknown GPE method type: %s " + "(name not of form _Lxx or _Exx)", Name)); + return_ACPI_STATUS (AE_OK); + } + + /* 4) The last two characters of the name are the hex GPE Number */ + + GpeNumber = ACPI_STRTOUL (&Name[2], NULL, 16); + if (GpeNumber == ACPI_UINT32_MAX) + { + /* Conversion failed; invalid method, just ignore it */ + + ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, + "Could not extract GPE number from name: %s " + "(name is not of form _Lxx or _Exx)", Name)); + return_ACPI_STATUS (AE_OK); + } + + /* Ensure that we have a valid GPE number for this GPE block */ + + GpeEventInfo = AcpiEvLowGetGpeInfo (GpeNumber, WalkInfo->GpeBlock); + if (!GpeEventInfo) + { + /* + * This GpeNumber is not valid for this GPE block, just ignore it. + * However, it may be valid for a different GPE block, since GPE0 + * and GPE1 methods both appear under \_GPE. + */ + return_ACPI_STATUS (AE_OK); + } + + if ((GpeEventInfo->Flags & ACPI_GPE_DISPATCH_MASK) == + ACPI_GPE_DISPATCH_HANDLER) + { + /* If there is already a handler, ignore this GPE method */ + + return_ACPI_STATUS (AE_OK); + } + + if ((GpeEventInfo->Flags & ACPI_GPE_DISPATCH_MASK) == + ACPI_GPE_DISPATCH_METHOD) + { + /* + * If there is already a method, ignore this method. But check + * for a type mismatch (if both the _Lxx AND _Exx exist) + */ + if (Type != (GpeEventInfo->Flags & ACPI_GPE_XRUPT_TYPE_MASK)) + { + ACPI_ERROR ((AE_INFO, + "For GPE 0x%.2X, found both _L%2.2X and _E%2.2X methods", + GpeNumber, GpeNumber, GpeNumber)); + } + return_ACPI_STATUS (AE_OK); + } + + /* + * Add the GPE information from above to the GpeEventInfo block for + * use during dispatch of this GPE. + */ + GpeEventInfo->Flags |= (UINT8) (Type | ACPI_GPE_DISPATCH_METHOD); + GpeEventInfo->Dispatch.MethodNode = MethodNode; + + /* + * Enable this GPE if requested. This only happens when during the + * execution of a Load or LoadTable operator. We have found a new + * GPE method and want to immediately enable the GPE if it is a + * runtime GPE. + */ + if (WalkInfo->EnableThisGpe) + { + /* Ignore GPEs that can wake the system */ + + if (!(GpeEventInfo->Flags & ACPI_GPE_CAN_WAKE) || + !AcpiGbl_LeaveWakeGpesDisabled) + { + WalkInfo->Count++; + GpeDevice = WalkInfo->GpeDevice; + + if (GpeDevice == AcpiGbl_FadtGpeDevice) + { + GpeDevice = NULL; + } + + Status = AcpiEnableGpe (GpeDevice, GpeNumber, + ACPI_GPE_TYPE_RUNTIME); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "Could not enable GPE 0x%02X", GpeNumber)); + } + } + } + + ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, + "Registered GPE method %s as GPE number 0x%.2X\n", + Name, GpeNumber)); + return_ACPI_STATUS (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvMatchPrwAndGpe + * + * PARAMETERS: Callback from WalkNamespace + * + * RETURN: Status. NOTE: We ignore errors so that the _PRW walk is + * not aborted on a single _PRW failure. + * + * DESCRIPTION: Called from AcpiWalkNamespace. Expects each object to be a + * Device. Run the _PRW method. If present, extract the GPE + * number and mark the GPE as a CAN_WAKE GPE. Allows a + * per-OwnerId execution if ExecuteByOwnerId is TRUE in the + * WalkInfo parameter block. + * + * If WalkInfo->ExecuteByOwnerId is TRUE, we only execute _PRWs with that + * owner. + * If WalkInfo->GpeDevice is NULL, we execute every _PRW found. Otherwise, + * we only execute _PRWs that refer to the input GpeDevice. + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvMatchPrwAndGpe ( + ACPI_HANDLE ObjHandle, + UINT32 Level, + void *Context, + void **ReturnValue) +{ + ACPI_GPE_WALK_INFO *WalkInfo = ACPI_CAST_PTR (ACPI_GPE_WALK_INFO, Context); + ACPI_NAMESPACE_NODE *GpeDevice; + ACPI_GPE_BLOCK_INFO *GpeBlock; + ACPI_NAMESPACE_NODE *TargetGpeDevice; + ACPI_NAMESPACE_NODE *PrwNode; + ACPI_GPE_EVENT_INFO *GpeEventInfo; + ACPI_OPERAND_OBJECT *PkgDesc; + ACPI_OPERAND_OBJECT *ObjDesc; + UINT32 GpeNumber; + ACPI_STATUS Status; + + + ACPI_FUNCTION_TRACE (EvMatchPrwAndGpe); + + + /* Check for a _PRW method under this device */ + + Status = AcpiNsGetNode (ObjHandle, METHOD_NAME__PRW, + ACPI_NS_NO_UPSEARCH, &PrwNode); + if (ACPI_FAILURE (Status)) + { + return_ACPI_STATUS (AE_OK); + } + + /* Check if requested OwnerId matches this OwnerId */ + + if ((WalkInfo->ExecuteByOwnerId) && + (PrwNode->OwnerId != WalkInfo->OwnerId)) + { + return_ACPI_STATUS (AE_OK); + } + + /* Execute the _PRW */ + + Status = AcpiUtEvaluateObject (PrwNode, NULL, + ACPI_BTYPE_PACKAGE, &PkgDesc); + if (ACPI_FAILURE (Status)) + { + return_ACPI_STATUS (AE_OK); + } + + /* The returned _PRW package must have at least two elements */ + + if (PkgDesc->Package.Count < 2) + { + goto Cleanup; + } + + /* Extract pointers from the input context */ + + GpeDevice = WalkInfo->GpeDevice; + GpeBlock = WalkInfo->GpeBlock; + + /* + * The _PRW object must return a package, we are only interested + * in the first element + */ + ObjDesc = PkgDesc->Package.Elements[0]; + + if (ObjDesc->Common.Type == ACPI_TYPE_INTEGER) + { + /* Use FADT-defined GPE device (from definition of _PRW) */ + + TargetGpeDevice = NULL; + if (GpeDevice) + { + TargetGpeDevice = AcpiGbl_FadtGpeDevice; + } + + /* Integer is the GPE number in the FADT described GPE blocks */ + + GpeNumber = (UINT32) ObjDesc->Integer.Value; + } + else if (ObjDesc->Common.Type == ACPI_TYPE_PACKAGE) + { + /* Package contains a GPE reference and GPE number within a GPE block */ + + if ((ObjDesc->Package.Count < 2) || + ((ObjDesc->Package.Elements[0])->Common.Type != + ACPI_TYPE_LOCAL_REFERENCE) || + ((ObjDesc->Package.Elements[1])->Common.Type != + ACPI_TYPE_INTEGER)) + { + goto Cleanup; + } + + /* Get GPE block reference and decode */ + + TargetGpeDevice = ObjDesc->Package.Elements[0]->Reference.Node; + GpeNumber = (UINT32) ObjDesc->Package.Elements[1]->Integer.Value; + } + else + { + /* Unknown type, just ignore it */ + + goto Cleanup; + } + + /* Get the GpeEventInfo for this GPE */ + + if (GpeDevice) + { + /* + * Is this GPE within this block? + * + * TRUE if and only if these conditions are true: + * 1) The GPE devices match. + * 2) The GPE index(number) is within the range of the Gpe Block + * associated with the GPE device. + */ + if (GpeDevice != TargetGpeDevice) + { + goto Cleanup; + } + + GpeEventInfo = AcpiEvLowGetGpeInfo (GpeNumber, GpeBlock); + } + else + { + /* GpeDevice is NULL, just match the TargetDevice and GpeNumber */ + + GpeEventInfo = AcpiEvGetGpeEventInfo (TargetGpeDevice, GpeNumber); + } + + if (GpeEventInfo) + { + if (!(GpeEventInfo->Flags & ACPI_GPE_CAN_WAKE)) + { + /* This GPE can wake the system */ + + GpeEventInfo->Flags |= ACPI_GPE_CAN_WAKE; + WalkInfo->Count++; + } + } + +Cleanup: + AcpiUtRemoveReference (PkgDesc); + return_ACPI_STATUS (AE_OK); +} + diff --git a/sys/contrib/dev/acpica/events/evgpeutil.c b/sys/contrib/dev/acpica/events/evgpeutil.c new file mode 100644 index 00000000000..d67f77e7778 --- /dev/null +++ b/sys/contrib/dev/acpica/events/evgpeutil.c @@ -0,0 +1,452 @@ +/****************************************************************************** + * + * Module Name: evgpeutil - GPE utilities + * + *****************************************************************************/ + +/****************************************************************************** + * + * 1. Copyright Notice + * + * Some or all of this work - Copyright (c) 1999 - 2010, Intel Corp. + * All rights reserved. + * + * 2. License + * + * 2.1. This is your license from Intel Corp. under its intellectual property + * rights. You may have additional license terms from the party that provided + * you this software, covering your right to use that party's intellectual + * property rights. + * + * 2.2. Intel grants, free of charge, to any person ("Licensee") obtaining a + * copy of the source code appearing in this file ("Covered Code") an + * irrevocable, perpetual, worldwide license under Intel's copyrights in the + * base code distributed originally by Intel ("Original Intel Code") to copy, + * make derivatives, distribute, use and display any portion of the Covered + * Code in any form, with the right to sublicense such rights; and + * + * 2.3. Intel grants Licensee a non-exclusive and non-transferable patent + * license (with the right to sublicense), under only those claims of Intel + * patents that are infringed by the Original Intel Code, to make, use, sell, + * offer to sell, and import the Covered Code and derivative works thereof + * solely to the minimum extent necessary to exercise the above copyright + * license, and in no event shall the patent license extend to any additions + * to or modifications of the Original Intel Code. No other license or right + * is granted directly or by implication, estoppel or otherwise; + * + * The above copyright and patent license is granted only if the following + * conditions are met: + * + * 3. Conditions + * + * 3.1. Redistribution of Source with Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification with rights to further distribute source must include + * the above Copyright Notice, the above License, this list of Conditions, + * and the following Disclaimer and Export Compliance provision. In addition, + * Licensee must cause all Covered Code to which Licensee contributes to + * contain a file documenting the changes Licensee made to create that Covered + * Code and the date of any change. Licensee must include in that file the + * documentation of any changes made by any predecessor Licensee. Licensee + * must include a prominent statement that the modification is derived, + * directly or indirectly, from Original Intel Code. + * + * 3.2. Redistribution of Source with no Rights to Further Distribute Source. + * Redistribution of source code of any substantial portion of the Covered + * Code or modification without rights to further distribute source must + * include the following Disclaimer and Export Compliance provision in the + * documentation and/or other materials provided with distribution. In + * addition, Licensee may not authorize further sublicense of source of any + * portion of the Covered Code, and must include terms to the effect that the + * license from Licensee to its licensee is limited to the intellectual + * property embodied in the software Licensee provides to its licensee, and + * not to intellectual property embodied in modifications its licensee may + * make. + * + * 3.3. Redistribution of Executable. Redistribution in executable form of any + * substantial portion of the Covered Code or modification must reproduce the + * above Copyright Notice, and the following Disclaimer and Export Compliance + * provision in the documentation and/or other materials provided with the + * distribution. + * + * 3.4. Intel retains all right, title, and interest in and to the Original + * Intel Code. + * + * 3.5. Neither the name Intel nor any other trademark owned or controlled by + * Intel shall be used in advertising or otherwise to promote the sale, use or + * other dealings in products derived from or relating to the Covered Code + * without prior written authorization from Intel. + * + * 4. Disclaimer and Export Compliance + * + * 4.1. INTEL MAKES NO WARRANTY OF ANY KIND REGARDING ANY SOFTWARE PROVIDED + * HERE. ANY SOFTWARE ORIGINATING FROM INTEL OR DERIVED FROM INTEL SOFTWARE + * IS PROVIDED "AS IS," AND INTEL WILL NOT PROVIDE ANY SUPPORT, ASSISTANCE, + * INSTALLATION, TRAINING OR OTHER SERVICES. INTEL WILL NOT PROVIDE ANY + * UPDATES, ENHANCEMENTS OR EXTENSIONS. INTEL SPECIFICALLY DISCLAIMS ANY + * IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT AND FITNESS FOR A + * PARTICULAR PURPOSE. + * + * 4.2. IN NO EVENT SHALL INTEL HAVE ANY LIABILITY TO LICENSEE, ITS LICENSEES + * OR ANY OTHER THIRD PARTY, FOR ANY LOST PROFITS, LOST DATA, LOSS OF USE OR + * COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY INDIRECT, + * SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THIS AGREEMENT, UNDER ANY + * CAUSE OF ACTION OR THEORY OF LIABILITY, AND IRRESPECTIVE OF WHETHER INTEL + * HAS ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES. THESE LIMITATIONS + * SHALL APPLY NOTWITHSTANDING THE FAILURE OF THE ESSENTIAL PURPOSE OF ANY + * LIMITED REMEDY. + * + * 4.3. Licensee shall not export, either directly or indirectly, any of this + * software or system incorporating such software without first obtaining any + * required license or other approval from the U. S. Department of Commerce or + * any other agency or department of the United States Government. In the + * event Licensee exports any such software from the United States or + * re-exports any such software from a foreign destination, Licensee shall + * ensure that the distribution and export/re-export of the software is in + * compliance with all laws, regulations, orders, or other restrictions of the + * U.S. Export Administration Regulations. Licensee agrees that neither it nor + * any of its subsidiaries will export/re-export any technical data, process, + * software, or service, directly or indirectly, to any country for which the + * United States government or any agency thereof requires an export license, + * other governmental approval, or letter of assurance, without first obtaining + * such license, approval or letter. + * + *****************************************************************************/ + + + +#include +#include +#include + +#define _COMPONENT ACPI_EVENTS + ACPI_MODULE_NAME ("evgpeutil") + + +/******************************************************************************* + * + * FUNCTION: AcpiEvWalkGpeList + * + * PARAMETERS: GpeWalkCallback - Routine called for each GPE block + * Context - Value passed to callback + * + * RETURN: Status + * + * DESCRIPTION: Walk the GPE lists. + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvWalkGpeList ( + ACPI_GPE_CALLBACK GpeWalkCallback, + void *Context) +{ + ACPI_GPE_BLOCK_INFO *GpeBlock; + ACPI_GPE_XRUPT_INFO *GpeXruptInfo; + ACPI_STATUS Status = AE_OK; + ACPI_CPU_FLAGS Flags; + + + ACPI_FUNCTION_TRACE (EvWalkGpeList); + + + Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); + + /* Walk the interrupt level descriptor list */ + + GpeXruptInfo = AcpiGbl_GpeXruptListHead; + while (GpeXruptInfo) + { + /* Walk all Gpe Blocks attached to this interrupt level */ + + GpeBlock = GpeXruptInfo->GpeBlockListHead; + while (GpeBlock) + { + /* One callback per GPE block */ + + Status = GpeWalkCallback (GpeXruptInfo, GpeBlock, Context); + if (ACPI_FAILURE (Status)) + { + if (Status == AE_CTRL_END) /* Callback abort */ + { + Status = AE_OK; + } + goto UnlockAndExit; + } + + GpeBlock = GpeBlock->Next; + } + + GpeXruptInfo = GpeXruptInfo->Next; + } + +UnlockAndExit: + AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); + return_ACPI_STATUS (Status); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvValidGpeEvent + * + * PARAMETERS: GpeEventInfo - Info for this GPE + * + * RETURN: TRUE if the GpeEvent is valid + * + * DESCRIPTION: Validate a GPE event. DO NOT CALL FROM INTERRUPT LEVEL. + * Should be called only when the GPE lists are semaphore locked + * and not subject to change. + * + ******************************************************************************/ + +BOOLEAN +AcpiEvValidGpeEvent ( + ACPI_GPE_EVENT_INFO *GpeEventInfo) +{ + ACPI_GPE_XRUPT_INFO *GpeXruptBlock; + ACPI_GPE_BLOCK_INFO *GpeBlock; + + + ACPI_FUNCTION_ENTRY (); + + + /* No need for spin lock since we are not changing any list elements */ + + /* Walk the GPE interrupt levels */ + + GpeXruptBlock = AcpiGbl_GpeXruptListHead; + while (GpeXruptBlock) + { + GpeBlock = GpeXruptBlock->GpeBlockListHead; + + /* Walk the GPE blocks on this interrupt level */ + + while (GpeBlock) + { + if ((&GpeBlock->EventInfo[0] <= GpeEventInfo) && + (&GpeBlock->EventInfo[GpeBlock->GpeCount] > GpeEventInfo)) + { + return (TRUE); + } + + GpeBlock = GpeBlock->Next; + } + + GpeXruptBlock = GpeXruptBlock->Next; + } + + return (FALSE); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvGetGpeXruptBlock + * + * PARAMETERS: InterruptNumber - Interrupt for a GPE block + * + * RETURN: A GPE interrupt block + * + * DESCRIPTION: Get or Create a GPE interrupt block. There is one interrupt + * block per unique interrupt level used for GPEs. Should be + * called only when the GPE lists are semaphore locked and not + * subject to change. + * + ******************************************************************************/ + +ACPI_GPE_XRUPT_INFO * +AcpiEvGetGpeXruptBlock ( + UINT32 InterruptNumber) +{ + ACPI_GPE_XRUPT_INFO *NextGpeXrupt; + ACPI_GPE_XRUPT_INFO *GpeXrupt; + ACPI_STATUS Status; + ACPI_CPU_FLAGS Flags; + + + ACPI_FUNCTION_TRACE (EvGetGpeXruptBlock); + + + /* No need for lock since we are not changing any list elements here */ + + NextGpeXrupt = AcpiGbl_GpeXruptListHead; + while (NextGpeXrupt) + { + if (NextGpeXrupt->InterruptNumber == InterruptNumber) + { + return_PTR (NextGpeXrupt); + } + + NextGpeXrupt = NextGpeXrupt->Next; + } + + /* Not found, must allocate a new xrupt descriptor */ + + GpeXrupt = ACPI_ALLOCATE_ZEROED (sizeof (ACPI_GPE_XRUPT_INFO)); + if (!GpeXrupt) + { + return_PTR (NULL); + } + + GpeXrupt->InterruptNumber = InterruptNumber; + + /* Install new interrupt descriptor with spin lock */ + + Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); + if (AcpiGbl_GpeXruptListHead) + { + NextGpeXrupt = AcpiGbl_GpeXruptListHead; + while (NextGpeXrupt->Next) + { + NextGpeXrupt = NextGpeXrupt->Next; + } + + NextGpeXrupt->Next = GpeXrupt; + GpeXrupt->Previous = NextGpeXrupt; + } + else + { + AcpiGbl_GpeXruptListHead = GpeXrupt; + } + AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); + + /* Install new interrupt handler if not SCI_INT */ + + if (InterruptNumber != AcpiGbl_FADT.SciInterrupt) + { + Status = AcpiOsInstallInterruptHandler (InterruptNumber, + AcpiEvGpeXruptHandler, GpeXrupt); + if (ACPI_FAILURE (Status)) + { + ACPI_ERROR ((AE_INFO, + "Could not install GPE interrupt handler at level 0x%X", + InterruptNumber)); + return_PTR (NULL); + } + } + + return_PTR (GpeXrupt); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvDeleteGpeXrupt + * + * PARAMETERS: GpeXrupt - A GPE interrupt info block + * + * RETURN: Status + * + * DESCRIPTION: Remove and free a GpeXrupt block. Remove an associated + * interrupt handler if not the SCI interrupt. + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvDeleteGpeXrupt ( + ACPI_GPE_XRUPT_INFO *GpeXrupt) +{ + ACPI_STATUS Status; + ACPI_CPU_FLAGS Flags; + + + ACPI_FUNCTION_TRACE (EvDeleteGpeXrupt); + + + /* We never want to remove the SCI interrupt handler */ + + if (GpeXrupt->InterruptNumber == AcpiGbl_FADT.SciInterrupt) + { + GpeXrupt->GpeBlockListHead = NULL; + return_ACPI_STATUS (AE_OK); + } + + /* Disable this interrupt */ + + Status = AcpiOsRemoveInterruptHandler ( + GpeXrupt->InterruptNumber, AcpiEvGpeXruptHandler); + if (ACPI_FAILURE (Status)) + { + return_ACPI_STATUS (Status); + } + + /* Unlink the interrupt block with lock */ + + Flags = AcpiOsAcquireLock (AcpiGbl_GpeLock); + if (GpeXrupt->Previous) + { + GpeXrupt->Previous->Next = GpeXrupt->Next; + } + else + { + /* No previous, update list head */ + + AcpiGbl_GpeXruptListHead = GpeXrupt->Next; + } + + if (GpeXrupt->Next) + { + GpeXrupt->Next->Previous = GpeXrupt->Previous; + } + AcpiOsReleaseLock (AcpiGbl_GpeLock, Flags); + + /* Free the block */ + + ACPI_FREE (GpeXrupt); + return_ACPI_STATUS (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiEvDeleteGpeHandlers + * + * PARAMETERS: GpeXruptInfo - GPE Interrupt info + * GpeBlock - Gpe Block info + * + * RETURN: Status + * + * DESCRIPTION: Delete all Handler objects found in the GPE data structs. + * Used only prior to termination. + * + ******************************************************************************/ + +ACPI_STATUS +AcpiEvDeleteGpeHandlers ( + ACPI_GPE_XRUPT_INFO *GpeXruptInfo, + ACPI_GPE_BLOCK_INFO *GpeBlock, + void *Context) +{ + ACPI_GPE_EVENT_INFO *GpeEventInfo; + UINT32 i; + UINT32 j; + + + ACPI_FUNCTION_TRACE (EvDeleteGpeHandlers); + + + /* Examine each GPE Register within the block */ + + for (i = 0; i < GpeBlock->RegisterCount; i++) + { + /* Now look at the individual GPEs in this byte register */ + + for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) + { + GpeEventInfo = &GpeBlock->EventInfo[((ACPI_SIZE) i * + ACPI_GPE_REGISTER_WIDTH) + j]; + + if ((GpeEventInfo->Flags & ACPI_GPE_DISPATCH_MASK) == + ACPI_GPE_DISPATCH_HANDLER) + { + ACPI_FREE (GpeEventInfo->Dispatch.Handler); + GpeEventInfo->Dispatch.Handler = NULL; + GpeEventInfo->Flags &= ~ACPI_GPE_DISPATCH_MASK; + } + } + } + + return_ACPI_STATUS (AE_OK); +} + diff --git a/sys/contrib/dev/acpica/executer/exconfig.c b/sys/contrib/dev/acpica/executer/exconfig.c index 95b78eda3b8..918fc1cf996 100644 --- a/sys/contrib/dev/acpica/executer/exconfig.c +++ b/sys/contrib/dev/acpica/executer/exconfig.c @@ -164,8 +164,9 @@ AcpiExAddTable ( ACPI_NAMESPACE_NODE *ParentNode, ACPI_OPERAND_OBJECT **DdbHandle) { - ACPI_STATUS Status; ACPI_OPERAND_OBJECT *ObjDesc; + ACPI_STATUS Status; + ACPI_OWNER_ID OwnerId; ACPI_FUNCTION_TRACE (ExAddTable); @@ -205,7 +206,15 @@ AcpiExAddTable ( AcpiNsExecModuleCodeList (); AcpiExEnterInterpreter (); - return_ACPI_STATUS (Status); + /* Update GPEs for any new _PRW or _Lxx/_Exx methods. Ignore errors */ + + Status = AcpiTbGetOwnerId (TableIndex, &OwnerId); + if (ACPI_SUCCESS (Status)) + { + AcpiEvUpdateGpes (OwnerId); + } + + return_ACPI_STATUS (AE_OK); } @@ -347,9 +356,8 @@ AcpiExLoadTableOp ( Status = AcpiGetTableByIndex (TableIndex, &Table); if (ACPI_SUCCESS (Status)) { - ACPI_INFO ((AE_INFO, - "Dynamic OEM Table Load - [%.4s] OemId [%.6s] OemTableId [%.8s]", - Table->Signature, Table->OemId, Table->OemTableId)); + ACPI_INFO ((AE_INFO, "Dynamic OEM Table Load:")); + AcpiTbPrintTableHeader (0, Table); } /* Invoke table handler if present */ @@ -644,6 +652,9 @@ AcpiExLoadOp ( return_ACPI_STATUS (Status); } + ACPI_INFO ((AE_INFO, "Dynamic OEM Table Load:")); + AcpiTbPrintTableHeader (0, TableDesc.Pointer); + /* Remove the reference by added by AcpiExStore above */ AcpiUtRemoveReference (DdbHandle); diff --git a/sys/contrib/dev/acpica/executer/exoparg1.c b/sys/contrib/dev/acpica/executer/exoparg1.c index 550980f9f3b..f12b14f97b7 100644 --- a/sys/contrib/dev/acpica/executer/exoparg1.c +++ b/sys/contrib/dev/acpica/executer/exoparg1.c @@ -268,7 +268,7 @@ AcpiExOpcode_1A_0T_0R ( case AML_SLEEP_OP: /* Sleep (MsecTime) */ - Status = AcpiExSystemDoSuspend (Operand[0]->Integer.Value); + Status = AcpiExSystemDoSleep (Operand[0]->Integer.Value); break; diff --git a/sys/contrib/dev/acpica/executer/exsystem.c b/sys/contrib/dev/acpica/executer/exsystem.c index 96e3063903e..2bc96fcc7bb 100644 --- a/sys/contrib/dev/acpica/executer/exsystem.c +++ b/sys/contrib/dev/acpica/executer/exsystem.c @@ -280,19 +280,19 @@ AcpiExSystemDoStall ( /******************************************************************************* * - * FUNCTION: AcpiExSystemDoSuspend + * FUNCTION: AcpiExSystemDoSleep * - * PARAMETERS: HowLong - The amount of time to suspend, + * PARAMETERS: HowLong - The amount of time to sleep, * in milliseconds * * RETURN: None * - * DESCRIPTION: Suspend running thread for specified amount of time. + * DESCRIPTION: Sleep the running thread for specified amount of time. * ******************************************************************************/ ACPI_STATUS -AcpiExSystemDoSuspend ( +AcpiExSystemDoSleep ( UINT64 HowLong) { ACPI_FUNCTION_ENTRY (); diff --git a/sys/contrib/dev/acpica/include/acevents.h b/sys/contrib/dev/acpica/include/acevents.h index 4d945eff8aa..40515579e5e 100644 --- a/sys/contrib/dev/acpica/include/acevents.h +++ b/sys/contrib/dev/acpica/include/acevents.h @@ -167,8 +167,12 @@ AcpiEvQueueNotifyRequest ( /* - * evgpe - GPE handling and dispatch + * evgpe - Low-level GPE support */ +UINT32 +AcpiEvGpeDetect ( + ACPI_GPE_XRUPT_INFO *GpeXruptList); + ACPI_STATUS AcpiEvUpdateGpeEnableMasks ( ACPI_GPE_EVENT_INFO *GpeEventInfo); @@ -193,23 +197,8 @@ AcpiEvLowGetGpeInfo ( /* - * evgpeblk + * evgpeblk - Upper-level GPE block support */ -BOOLEAN -AcpiEvValidGpeEvent ( - ACPI_GPE_EVENT_INFO *GpeEventInfo); - -ACPI_STATUS -AcpiEvWalkGpeList ( - ACPI_GPE_CALLBACK GpeWalkCallback, - void *Context); - -ACPI_STATUS -AcpiEvDeleteGpeHandlers ( - ACPI_GPE_XRUPT_INFO *GpeXruptInfo, - ACPI_GPE_BLOCK_INFO *GpeBlock, - void *Context); - ACPI_STATUS AcpiEvCreateGpeBlock ( ACPI_NAMESPACE_NODE *GpeDevice, @@ -233,14 +222,57 @@ AcpiEvGpeDispatch ( ACPI_GPE_EVENT_INFO *GpeEventInfo, UINT32 GpeNumber); -UINT32 -AcpiEvGpeDetect ( - ACPI_GPE_XRUPT_INFO *GpeXruptList); - +/* + * evgpeinit - GPE initialization and update + */ ACPI_STATUS AcpiEvGpeInitialize ( void); +void +AcpiEvUpdateGpes ( + ACPI_OWNER_ID TableOwnerId); + +ACPI_STATUS +AcpiEvMatchGpeMethod ( + ACPI_HANDLE ObjHandle, + UINT32 Level, + void *Context, + void **ReturnValue); + +ACPI_STATUS +AcpiEvMatchPrwAndGpe ( + ACPI_HANDLE ObjHandle, + UINT32 Level, + void *Context, + void **ReturnValue); + +/* + * evgpeutil - GPE utilities + */ +ACPI_STATUS +AcpiEvWalkGpeList ( + ACPI_GPE_CALLBACK GpeWalkCallback, + void *Context); + +BOOLEAN +AcpiEvValidGpeEvent ( + ACPI_GPE_EVENT_INFO *GpeEventInfo); + +ACPI_GPE_XRUPT_INFO * +AcpiEvGetGpeXruptBlock ( + UINT32 InterruptNumber); + +ACPI_STATUS +AcpiEvDeleteGpeXrupt ( + ACPI_GPE_XRUPT_INFO *GpeXrupt); + +ACPI_STATUS +AcpiEvDeleteGpeHandlers ( + ACPI_GPE_XRUPT_INFO *GpeXruptInfo, + ACPI_GPE_BLOCK_INFO *GpeBlock, + void *Context); + /* * evregion - Address Space handling diff --git a/sys/contrib/dev/acpica/include/acglobal.h b/sys/contrib/dev/acpica/include/acglobal.h index 4f0769e6f67..e0f8871fcd0 100644 --- a/sys/contrib/dev/acpica/include/acglobal.h +++ b/sys/contrib/dev/acpica/include/acglobal.h @@ -216,11 +216,10 @@ ACPI_NAME AcpiGbl_TraceMethodName; ****************************************************************************/ /* - * AcpiGbl_RootTableList is the master list of ACPI tables found in the - * RSDT/XSDT. - * + * AcpiGbl_RootTableList is the master list of ACPI tables that were + * found in the RSDT/XSDT. */ -ACPI_EXTERN ACPI_INTERNAL_RSDT AcpiGbl_RootTableList; +ACPI_EXTERN ACPI_TABLE_LIST AcpiGbl_RootTableList; ACPI_EXTERN ACPI_TABLE_FACS *AcpiGbl_FACS; /* These addresses are calculated from the FADT Event Block addresses */ diff --git a/sys/contrib/dev/acpica/include/acinterp.h b/sys/contrib/dev/acpica/include/acinterp.h index 67142c8911b..e3fa940a102 100644 --- a/sys/contrib/dev/acpica/include/acinterp.h +++ b/sys/contrib/dev/acpica/include/acinterp.h @@ -434,7 +434,7 @@ AcpiExSystemDoNotifyOp ( ACPI_OPERAND_OBJECT *ObjDesc); ACPI_STATUS -AcpiExSystemDoSuspend( +AcpiExSystemDoSleep( UINT64 Time); ACPI_STATUS diff --git a/sys/contrib/dev/acpica/include/aclocal.h b/sys/contrib/dev/acpica/include/aclocal.h index 8818a6ea9b5..1deced01136 100644 --- a/sys/contrib/dev/acpica/include/aclocal.h +++ b/sys/contrib/dev/acpica/include/aclocal.h @@ -307,16 +307,16 @@ typedef struct acpi_namespace_node #define ANOBJ_IS_REFERENCED 0x80 /* iASL only: Object was referenced */ -/* One internal RSDT for table management */ +/* Internal ACPI table management - master table list */ -typedef struct acpi_internal_rsdt +typedef struct acpi_table_list { - ACPI_TABLE_DESC *Tables; - UINT32 Count; - UINT32 Size; + ACPI_TABLE_DESC *Tables; /* Table descriptor array */ + UINT32 CurrentTableCount; /* Tables currently in the array */ + UINT32 MaxTableCount; /* Max tables array will hold */ UINT8 Flags; -} ACPI_INTERNAL_RSDT; +} ACPI_TABLE_LIST; /* Flags for above */ @@ -612,6 +612,10 @@ typedef struct acpi_gpe_walk_info { ACPI_NAMESPACE_NODE *GpeDevice; ACPI_GPE_BLOCK_INFO *GpeBlock; + UINT16 Count; + ACPI_OWNER_ID OwnerId; + BOOLEAN EnableThisGpe; + BOOLEAN ExecuteByOwnerId; } ACPI_GPE_WALK_INFO; diff --git a/sys/contrib/dev/acpica/include/acpiosxf.h b/sys/contrib/dev/acpica/include/acpiosxf.h index 13570b4f173..9924d4d786c 100644 --- a/sys/contrib/dev/acpica/include/acpiosxf.h +++ b/sys/contrib/dev/acpica/include/acpiosxf.h @@ -410,8 +410,8 @@ AcpiOsWritePciConfiguration ( */ void AcpiOsDerivePciId( - ACPI_HANDLE Rhandle, - ACPI_HANDLE Chandle, + ACPI_HANDLE Device, + ACPI_HANDLE Region, ACPI_PCI_ID **PciId); diff --git a/sys/contrib/dev/acpica/include/acpixf.h b/sys/contrib/dev/acpica/include/acpixf.h index 5562fa0215d..48d824f1192 100644 --- a/sys/contrib/dev/acpica/include/acpixf.h +++ b/sys/contrib/dev/acpica/include/acpixf.h @@ -120,7 +120,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20100331 +#define ACPI_CA_VERSION 0x20100428 #include #include @@ -283,7 +283,7 @@ AcpiGetDevices ( ACPI_STATUS AcpiGetName ( - ACPI_HANDLE Handle, + ACPI_HANDLE Object, UINT32 NameType, ACPI_BUFFER *RetPathPtr); @@ -295,18 +295,18 @@ AcpiGetHandle ( ACPI_STATUS AcpiAttachData ( - ACPI_HANDLE ObjHandle, + ACPI_HANDLE Object, ACPI_OBJECT_HANDLER Handler, void *Data); ACPI_STATUS AcpiDetachData ( - ACPI_HANDLE ObjHandle, + ACPI_HANDLE Object, ACPI_OBJECT_HANDLER Handler); ACPI_STATUS AcpiGetData ( - ACPI_HANDLE ObjHandle, + ACPI_HANDLE Object, ACPI_OBJECT_HANDLER Handler, void **Data); @@ -338,7 +338,7 @@ AcpiEvaluateObjectTyped ( ACPI_STATUS AcpiGetObjectInfo ( - ACPI_HANDLE Handle, + ACPI_HANDLE Object, ACPI_DEVICE_INFO **ReturnBuffer); ACPI_STATUS @@ -527,36 +527,36 @@ ACPI_STATUS (*ACPI_WALK_RESOURCE_CALLBACK) ( ACPI_STATUS AcpiGetVendorResource ( - ACPI_HANDLE DeviceHandle, + ACPI_HANDLE Device, char *Name, ACPI_VENDOR_UUID *Uuid, ACPI_BUFFER *RetBuffer); ACPI_STATUS -AcpiGetCurrentResources( - ACPI_HANDLE DeviceHandle, +AcpiGetCurrentResources ( + ACPI_HANDLE Device, ACPI_BUFFER *RetBuffer); ACPI_STATUS -AcpiGetPossibleResources( - ACPI_HANDLE DeviceHandle, +AcpiGetPossibleResources ( + ACPI_HANDLE Device, ACPI_BUFFER *RetBuffer); ACPI_STATUS AcpiWalkResources ( - ACPI_HANDLE DeviceHandle, + ACPI_HANDLE Device, char *Name, ACPI_WALK_RESOURCE_CALLBACK UserFunction, void *Context); ACPI_STATUS AcpiSetCurrentResources ( - ACPI_HANDLE DeviceHandle, + ACPI_HANDLE Device, ACPI_BUFFER *InBuffer); ACPI_STATUS -AcpiGetIrqRoutingTable ( - ACPI_HANDLE BusDeviceHandle, +AcpiGetIrqRoutingTable ( + ACPI_HANDLE Device, ACPI_BUFFER *RetBuffer); ACPI_STATUS diff --git a/sys/contrib/dev/acpica/include/actypes.h b/sys/contrib/dev/acpica/include/actypes.h index 3c0626ab1e3..601e87e908d 100644 --- a/sys/contrib/dev/acpica/include/actypes.h +++ b/sys/contrib/dev/acpica/include/actypes.h @@ -1099,7 +1099,7 @@ ACPI_STATUS (*ACPI_ADR_SPACE_SETUP) ( typedef ACPI_STATUS (*ACPI_WALK_CALLBACK) ( - ACPI_HANDLE ObjHandle, + ACPI_HANDLE Object, UINT32 NestingLevel, void *Context, void **ReturnValue); diff --git a/sys/contrib/dev/acpica/include/amlresrc.h b/sys/contrib/dev/acpica/include/amlresrc.h index 9084e44540a..4cd2617e22b 100644 --- a/sys/contrib/dev/acpica/include/amlresrc.h +++ b/sys/contrib/dev/acpica/include/amlresrc.h @@ -300,6 +300,12 @@ typedef struct aml_resource_large_header } AML_RESOURCE_LARGE_HEADER; +/* General Flags for address space resource descriptors */ + +#define ACPI_RESOURCE_FLAG_DEC 2 +#define ACPI_RESOURCE_FLAG_MIF 4 +#define ACPI_RESOURCE_FLAG_MAF 8 + typedef struct aml_resource_memory24 { AML_RESOURCE_LARGE_HEADER_COMMON diff --git a/sys/contrib/dev/acpica/osunixxf.c b/sys/contrib/dev/acpica/osunixxf.c index a0e38e46c0a..456fd19e78a 100644 --- a/sys/contrib/dev/acpica/osunixxf.c +++ b/sys/contrib/dev/acpica/osunixxf.c @@ -986,8 +986,8 @@ AcpiOsWritePciConfiguration ( /* TEMPORARY STUB FUNCTION */ void AcpiOsDerivePciId( - ACPI_HANDLE rhandle, - ACPI_HANDLE chandle, + ACPI_HANDLE Device, + ACPI_HANDLE Region, ACPI_PCI_ID **PciId) { diff --git a/sys/contrib/dev/acpica/tables/tbfind.c b/sys/contrib/dev/acpica/tables/tbfind.c index d6cee630e13..f9b28bafa04 100644 --- a/sys/contrib/dev/acpica/tables/tbfind.c +++ b/sys/contrib/dev/acpica/tables/tbfind.c @@ -164,7 +164,7 @@ AcpiTbFindTable ( /* Search for the table */ - for (i = 0; i < AcpiGbl_RootTableList.Count; ++i) + for (i = 0; i < AcpiGbl_RootTableList.CurrentTableCount; ++i) { if (ACPI_MEMCMP (&(AcpiGbl_RootTableList.Tables[i].Signature), Header.Signature, ACPI_NAME_SIZE)) diff --git a/sys/contrib/dev/acpica/tables/tbinstal.c b/sys/contrib/dev/acpica/tables/tbinstal.c index be5561249e9..d573eb98a3e 100644 --- a/sys/contrib/dev/acpica/tables/tbinstal.c +++ b/sys/contrib/dev/acpica/tables/tbinstal.c @@ -227,7 +227,7 @@ AcpiTbAddTable ( /* Check if table is already registered */ - for (i = 0; i < AcpiGbl_RootTableList.Count; ++i) + for (i = 0; i < AcpiGbl_RootTableList.CurrentTableCount; ++i) { if (!AcpiGbl_RootTableList.Tables[i].Pointer) { @@ -370,7 +370,7 @@ AcpiTbResizeRootTableList ( /* Increase the Table Array size */ Tables = ACPI_ALLOCATE_ZEROED ( - ((ACPI_SIZE) AcpiGbl_RootTableList.Size + + ((ACPI_SIZE) AcpiGbl_RootTableList.MaxTableCount + ACPI_ROOT_TABLE_SIZE_INCREMENT) * sizeof (ACPI_TABLE_DESC)); if (!Tables) @@ -384,7 +384,7 @@ AcpiTbResizeRootTableList ( if (AcpiGbl_RootTableList.Tables) { ACPI_MEMCPY (Tables, AcpiGbl_RootTableList.Tables, - (ACPI_SIZE) AcpiGbl_RootTableList.Size * sizeof (ACPI_TABLE_DESC)); + (ACPI_SIZE) AcpiGbl_RootTableList.MaxTableCount * sizeof (ACPI_TABLE_DESC)); if (AcpiGbl_RootTableList.Flags & ACPI_ROOT_ORIGIN_ALLOCATED) { @@ -393,7 +393,7 @@ AcpiTbResizeRootTableList ( } AcpiGbl_RootTableList.Tables = Tables; - AcpiGbl_RootTableList.Size += ACPI_ROOT_TABLE_SIZE_INCREMENT; + AcpiGbl_RootTableList.MaxTableCount += ACPI_ROOT_TABLE_SIZE_INCREMENT; AcpiGbl_RootTableList.Flags |= (UINT8) ACPI_ROOT_ORIGIN_ALLOCATED; return_ACPI_STATUS (AE_OK); @@ -423,12 +423,14 @@ AcpiTbStoreTable ( UINT8 Flags, UINT32 *TableIndex) { - ACPI_STATUS Status = AE_OK; + ACPI_STATUS Status; + ACPI_TABLE_DESC *NewTable; /* Ensure that there is room for the table in the Root Table List */ - if (AcpiGbl_RootTableList.Count >= AcpiGbl_RootTableList.Size) + if (AcpiGbl_RootTableList.CurrentTableCount >= + AcpiGbl_RootTableList.MaxTableCount) { Status = AcpiTbResizeRootTableList(); if (ACPI_FAILURE (Status)) @@ -437,21 +439,21 @@ AcpiTbStoreTable ( } } + NewTable = &AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.CurrentTableCount]; + /* Initialize added table */ - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Address = Address; - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Pointer = Table; - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Length = Length; - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].OwnerId = 0; - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Flags = Flags; + NewTable->Address = Address; + NewTable->Pointer = Table; + NewTable->Length = Length; + NewTable->OwnerId = 0; + NewTable->Flags = Flags; - ACPI_MOVE_32_TO_32 ( - &(AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Signature), - Table->Signature); + ACPI_MOVE_32_TO_32 (&NewTable->Signature, Table->Signature); - *TableIndex = AcpiGbl_RootTableList.Count; - AcpiGbl_RootTableList.Count++; - return (Status); + *TableIndex = AcpiGbl_RootTableList.CurrentTableCount; + AcpiGbl_RootTableList.CurrentTableCount++; + return (AE_OK); } @@ -523,7 +525,7 @@ AcpiTbTerminate ( /* Delete the individual tables */ - for (i = 0; i < AcpiGbl_RootTableList.Count; i++) + for (i = 0; i < AcpiGbl_RootTableList.CurrentTableCount; i++) { AcpiTbDeleteTable (&AcpiGbl_RootTableList.Tables[i]); } @@ -539,7 +541,7 @@ AcpiTbTerminate ( AcpiGbl_RootTableList.Tables = NULL; AcpiGbl_RootTableList.Flags = 0; - AcpiGbl_RootTableList.Count = 0; + AcpiGbl_RootTableList.CurrentTableCount = 0; ACPI_DEBUG_PRINT ((ACPI_DB_INFO, "ACPI Tables freed\n")); (void) AcpiUtReleaseMutex (ACPI_MTX_TABLES); @@ -575,7 +577,7 @@ AcpiTbDeleteNamespaceByOwner ( return_ACPI_STATUS (Status); } - if (TableIndex >= AcpiGbl_RootTableList.Count) + if (TableIndex >= AcpiGbl_RootTableList.CurrentTableCount) { /* The table index does not exist */ @@ -634,7 +636,7 @@ AcpiTbAllocateOwnerId ( (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - if (TableIndex < AcpiGbl_RootTableList.Count) + if (TableIndex < AcpiGbl_RootTableList.CurrentTableCount) { Status = AcpiUtAllocateOwnerId (&(AcpiGbl_RootTableList.Tables[TableIndex].OwnerId)); @@ -668,7 +670,7 @@ AcpiTbReleaseOwnerId ( (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - if (TableIndex < AcpiGbl_RootTableList.Count) + if (TableIndex < AcpiGbl_RootTableList.CurrentTableCount) { AcpiUtReleaseOwnerId ( &(AcpiGbl_RootTableList.Tables[TableIndex].OwnerId)); @@ -705,7 +707,7 @@ AcpiTbGetOwnerId ( (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - if (TableIndex < AcpiGbl_RootTableList.Count) + if (TableIndex < AcpiGbl_RootTableList.CurrentTableCount) { *OwnerId = AcpiGbl_RootTableList.Tables[TableIndex].OwnerId; Status = AE_OK; @@ -734,7 +736,7 @@ AcpiTbIsTableLoaded ( (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - if (TableIndex < AcpiGbl_RootTableList.Count) + if (TableIndex < AcpiGbl_RootTableList.CurrentTableCount) { IsLoaded = (BOOLEAN) (AcpiGbl_RootTableList.Tables[TableIndex].Flags & @@ -766,7 +768,7 @@ AcpiTbSetTableLoadedFlag ( { (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - if (TableIndex < AcpiGbl_RootTableList.Count) + if (TableIndex < AcpiGbl_RootTableList.CurrentTableCount) { if (IsLoaded) { diff --git a/sys/contrib/dev/acpica/tables/tbutils.c b/sys/contrib/dev/acpica/tables/tbutils.c index 916c04e0241..f6acc3cfb8a 100644 --- a/sys/contrib/dev/acpica/tables/tbutils.c +++ b/sys/contrib/dev/acpica/tables/tbutils.c @@ -184,7 +184,7 @@ AcpiTbTablesLoaded ( void) { - if (AcpiGbl_RootTableList.Count >= 3) + if (AcpiGbl_RootTableList.CurrentTableCount >= 3) { return (TRUE); } @@ -780,14 +780,15 @@ AcpiTbParseRootTable ( * come from the FADT */ TableEntry = ACPI_CAST_PTR (UINT8, Table) + sizeof (ACPI_TABLE_HEADER); - AcpiGbl_RootTableList.Count = 2; + AcpiGbl_RootTableList.CurrentTableCount = 2; /* * Initialize the root table array from the RSDT/XSDT */ for (i = 0; i < TableCount; i++) { - if (AcpiGbl_RootTableList.Count >= AcpiGbl_RootTableList.Size) + if (AcpiGbl_RootTableList.CurrentTableCount >= + AcpiGbl_RootTableList.MaxTableCount) { /* There is no more room in the root table array, attempt resize */ @@ -796,18 +797,18 @@ AcpiTbParseRootTable ( { ACPI_WARNING ((AE_INFO, "Truncating %u table entries!", (unsigned) (TableCount - - (AcpiGbl_RootTableList.Count - 2)))); + (AcpiGbl_RootTableList.CurrentTableCount - 2)))); break; } } /* Get the table physical address (32-bit for RSDT, 64-bit for XSDT) */ - AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.Count].Address = + AcpiGbl_RootTableList.Tables[AcpiGbl_RootTableList.CurrentTableCount].Address = AcpiTbGetRootTableEntry (TableEntry, TableEntrySize); TableEntry += TableEntrySize; - AcpiGbl_RootTableList.Count++; + AcpiGbl_RootTableList.CurrentTableCount++; } /* @@ -820,7 +821,7 @@ AcpiTbParseRootTable ( * Complete the initialization of the root table array by examining * the header of each table */ - for (i = 2; i < AcpiGbl_RootTableList.Count; i++) + for (i = 2; i < AcpiGbl_RootTableList.CurrentTableCount; i++) { AcpiTbInstallTable (AcpiGbl_RootTableList.Tables[i].Address, NULL, i); diff --git a/sys/contrib/dev/acpica/tables/tbxface.c b/sys/contrib/dev/acpica/tables/tbxface.c index e2eeb2e5aed..366a5254142 100644 --- a/sys/contrib/dev/acpica/tables/tbxface.c +++ b/sys/contrib/dev/acpica/tables/tbxface.c @@ -150,7 +150,7 @@ AcpiAllocateRootTable ( UINT32 InitialTableCount) { - AcpiGbl_RootTableList.Size = InitialTableCount; + AcpiGbl_RootTableList.MaxTableCount = InitialTableCount; AcpiGbl_RootTableList.Flags = ACPI_ROOT_ALLOW_RESIZE; return (AcpiTbResizeRootTableList ()); @@ -216,7 +216,7 @@ AcpiInitializeTables ( (ACPI_SIZE) InitialTableCount * sizeof (ACPI_TABLE_DESC)); AcpiGbl_RootTableList.Tables = InitialTableArray; - AcpiGbl_RootTableList.Size = InitialTableCount; + AcpiGbl_RootTableList.MaxTableCount = InitialTableCount; AcpiGbl_RootTableList.Flags = ACPI_ROOT_ORIGIN_UNKNOWN; if (AllowResize) { @@ -285,7 +285,7 @@ AcpiReallocateRootTable ( * increment to create the new table size. */ CurrentSize = (ACPI_SIZE) - AcpiGbl_RootTableList.Count * sizeof (ACPI_TABLE_DESC); + AcpiGbl_RootTableList.CurrentTableCount * sizeof (ACPI_TABLE_DESC); NewSize = CurrentSize + (ACPI_ROOT_TABLE_SIZE_INCREMENT * sizeof (ACPI_TABLE_DESC)); @@ -306,8 +306,8 @@ AcpiReallocateRootTable ( * size of the original table list. */ AcpiGbl_RootTableList.Tables = Tables; - AcpiGbl_RootTableList.Size = - AcpiGbl_RootTableList.Count + ACPI_ROOT_TABLE_SIZE_INCREMENT; + AcpiGbl_RootTableList.MaxTableCount = + AcpiGbl_RootTableList.CurrentTableCount + ACPI_ROOT_TABLE_SIZE_INCREMENT; AcpiGbl_RootTableList.Flags = ACPI_ROOT_ORIGIN_ALLOCATED | ACPI_ROOT_ALLOW_RESIZE; @@ -354,7 +354,7 @@ AcpiGetTableHeader ( /* Walk the root table list */ - for (i = 0, j = 0; i < AcpiGbl_RootTableList.Count; i++) + for (i = 0, j = 0; i < AcpiGbl_RootTableList.CurrentTableCount; i++) { if (!ACPI_COMPARE_NAME (&(AcpiGbl_RootTableList.Tables[i].Signature), Signature)) @@ -439,7 +439,7 @@ AcpiGetTable ( /* Walk the root table list */ - for (i = 0, j = 0; i < AcpiGbl_RootTableList.Count; i++) + for (i = 0, j = 0; i < AcpiGbl_RootTableList.CurrentTableCount; i++) { if (!ACPI_COMPARE_NAME (&(AcpiGbl_RootTableList.Tables[i].Signature), Signature)) @@ -502,7 +502,7 @@ AcpiGetTableByIndex ( /* Validate index */ - if (TableIndex >= AcpiGbl_RootTableList.Count) + if (TableIndex >= AcpiGbl_RootTableList.CurrentTableCount) { (void) AcpiUtReleaseMutex (ACPI_MTX_TABLES); return_ACPI_STATUS (AE_BAD_PARAMETER); @@ -559,7 +559,7 @@ AcpiTbLoadNamespace ( * Load the namespace. The DSDT is required, but any SSDT and * PSDT tables are optional. Verify the DSDT. */ - if (!AcpiGbl_RootTableList.Count || + if (!AcpiGbl_RootTableList.CurrentTableCount || !ACPI_COMPARE_NAME ( &(AcpiGbl_RootTableList.Tables[ACPI_TABLE_INDEX_DSDT].Signature), ACPI_SIG_DSDT) || @@ -613,7 +613,7 @@ AcpiTbLoadNamespace ( /* Load any SSDT or PSDT tables. Note: Loop leaves tables locked */ (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); - for (i = 2; i < AcpiGbl_RootTableList.Count; ++i) + for (i = 2; i < AcpiGbl_RootTableList.CurrentTableCount; ++i) { if ((!ACPI_COMPARE_NAME (&(AcpiGbl_RootTableList.Tables[i].Signature), ACPI_SIG_SSDT) && diff --git a/sys/contrib/dev/acpica/utilities/utcopy.c b/sys/contrib/dev/acpica/utilities/utcopy.c index b16e14c1439..f6bfba04ded 100644 --- a/sys/contrib/dev/acpica/utilities/utcopy.c +++ b/sys/contrib/dev/acpica/utilities/utcopy.c @@ -797,6 +797,7 @@ AcpiUtCopySimpleObject ( UINT16 ReferenceCount; ACPI_OPERAND_OBJECT *NextObject; ACPI_STATUS Status; + ACPI_SIZE CopySize; /* Save fields from destination that we don't want to overwrite */ @@ -804,10 +805,18 @@ AcpiUtCopySimpleObject ( ReferenceCount = DestDesc->Common.ReferenceCount; NextObject = DestDesc->Common.NextObject; - /* Copy the entire source object over the destination object*/ + /* + * Copy the entire source object over the destination object. + * Note: Source can be either an operand object or namespace node. + */ + CopySize = sizeof (ACPI_OPERAND_OBJECT); + if (ACPI_GET_DESCRIPTOR_TYPE (SourceDesc) == ACPI_DESC_TYPE_NAMED) + { + CopySize = sizeof (ACPI_NAMESPACE_NODE); + } - ACPI_MEMCPY ((char *) DestDesc, (char *) SourceDesc, - sizeof (ACPI_OPERAND_OBJECT)); + ACPI_MEMCPY (ACPI_CAST_PTR (char, DestDesc), + ACPI_CAST_PTR (char, SourceDesc), CopySize); /* Restore the saved fields */ @@ -841,8 +850,7 @@ AcpiUtCopySimpleObject ( /* Copy the actual buffer data */ ACPI_MEMCPY (DestDesc->Buffer.Pointer, - SourceDesc->Buffer.Pointer, - SourceDesc->Buffer.Length); + SourceDesc->Buffer.Pointer, SourceDesc->Buffer.Length); } break; @@ -864,7 +872,7 @@ AcpiUtCopySimpleObject ( /* Copy the actual string data */ ACPI_MEMCPY (DestDesc->String.Pointer, SourceDesc->String.Pointer, - (ACPI_SIZE) SourceDesc->String.Length + 1); + (ACPI_SIZE) SourceDesc->String.Length + 1); } break; diff --git a/sys/contrib/dev/acpica/utilities/utglobal.c b/sys/contrib/dev/acpica/utilities/utglobal.c index 85a93fe77a6..0483be9ff09 100644 --- a/sys/contrib/dev/acpica/utilities/utglobal.c +++ b/sys/contrib/dev/acpica/utilities/utglobal.c @@ -625,7 +625,7 @@ AcpiUtGetNodeName ( static const char *AcpiGbl_DescTypeNames[] = { - /* 00 */ "Invalid", + /* 00 */ "Not a Descriptor", /* 01 */ "Cached", /* 02 */ "State-Generic", /* 03 */ "State-Update", @@ -656,7 +656,7 @@ AcpiUtGetDescriptorName ( if (ACPI_GET_DESCRIPTOR_TYPE (Object) > ACPI_DESC_TYPE_MAX) { - return (ACPI_CAST_PTR (char, AcpiGbl_BadType)); + return ("Not a Descriptor"); } return (ACPI_CAST_PTR (char, diff --git a/sys/contrib/dev/acpica/utilities/uttrack.c b/sys/contrib/dev/acpica/utilities/uttrack.c index ddb35ef891c..fc5a47231b0 100644 --- a/sys/contrib/dev/acpica/utilities/uttrack.c +++ b/sys/contrib/dev/acpica/utilities/uttrack.c @@ -644,6 +644,7 @@ AcpiUtDumpAllocations ( ACPI_DEBUG_MEM_BLOCK *Element; ACPI_DESCRIPTOR *Descriptor; UINT32 NumOutstanding = 0; + UINT8 DescriptorType; ACPI_FUNCTION_TRACE (UtDumpAllocations); @@ -663,43 +664,86 @@ AcpiUtDumpAllocations ( if ((Element->Component & Component) && ((Module == NULL) || (0 == ACPI_STRCMP (Module, Element->Module)))) { - /* Ignore allocated objects that are in a cache */ - Descriptor = ACPI_CAST_PTR (ACPI_DESCRIPTOR, &Element->UserSpace); - if (ACPI_GET_DESCRIPTOR_TYPE (Descriptor) != ACPI_DESC_TYPE_CACHED) + + if (Element->Size < sizeof (ACPI_COMMON_DESCRIPTOR)) { - AcpiOsPrintf ("%p Len %04X %9.9s-%d [%s] ", + AcpiOsPrintf ("%p Length 0x%04X %9.9s-%d " + "[Not a Descriptor - too small]\n", Descriptor, Element->Size, Element->Module, - Element->Line, AcpiUtGetDescriptorName (Descriptor)); - - /* Most of the elements will be Operand objects. */ - - switch (ACPI_GET_DESCRIPTOR_TYPE (Descriptor)) - { - case ACPI_DESC_TYPE_OPERAND: - AcpiOsPrintf ("%12.12s R%hd", - AcpiUtGetTypeName (Descriptor->Object.Common.Type), - Descriptor->Object.Common.ReferenceCount); - break; - - case ACPI_DESC_TYPE_PARSER: - AcpiOsPrintf ("AmlOpcode %04hX", - Descriptor->Op.Asl.AmlOpcode); - break; - - case ACPI_DESC_TYPE_NAMED: - AcpiOsPrintf ("%4.4s", - AcpiUtGetNodeName (&Descriptor->Node)); - break; - - default: - break; - } - - AcpiOsPrintf ( "\n"); - NumOutstanding++; + Element->Line); } + else + { + /* Ignore allocated objects that are in a cache */ + + if (ACPI_GET_DESCRIPTOR_TYPE (Descriptor) != ACPI_DESC_TYPE_CACHED) + { + AcpiOsPrintf ("%p Length 0x%04X %9.9s-%d [%s] ", + Descriptor, Element->Size, Element->Module, + Element->Line, AcpiUtGetDescriptorName (Descriptor)); + + /* Validate the descriptor type using Type field and length */ + + DescriptorType = 0; /* Not a valid descriptor type */ + + switch (ACPI_GET_DESCRIPTOR_TYPE (Descriptor)) + { + case ACPI_DESC_TYPE_OPERAND: + if (Element->Size == sizeof (ACPI_DESC_TYPE_OPERAND)) + { + DescriptorType = ACPI_DESC_TYPE_OPERAND; + } + break; + + case ACPI_DESC_TYPE_PARSER: + if (Element->Size == sizeof (ACPI_DESC_TYPE_PARSER)) + { + DescriptorType = ACPI_DESC_TYPE_PARSER; + } + break; + + case ACPI_DESC_TYPE_NAMED: + if (Element->Size == sizeof (ACPI_DESC_TYPE_NAMED)) + { + DescriptorType = ACPI_DESC_TYPE_NAMED; + } + break; + + default: + break; + } + + /* Display additional info for the major descriptor types */ + + switch (DescriptorType) + { + case ACPI_DESC_TYPE_OPERAND: + AcpiOsPrintf ("%12.12s RefCount 0x%04X\n", + AcpiUtGetTypeName (Descriptor->Object.Common.Type), + Descriptor->Object.Common.ReferenceCount); + break; + + case ACPI_DESC_TYPE_PARSER: + AcpiOsPrintf ("AmlOpcode 0x%04hX\n", + Descriptor->Op.Asl.AmlOpcode); + break; + + case ACPI_DESC_TYPE_NAMED: + AcpiOsPrintf ("%4.4s\n", + AcpiUtGetNodeName (&Descriptor->Node)); + break; + + default: + AcpiOsPrintf ( "\n"); + break; + } + } + } + + NumOutstanding++; } + Element = Element->Next; } @@ -709,13 +753,11 @@ AcpiUtDumpAllocations ( if (!NumOutstanding) { - ACPI_INFO ((AE_INFO, - "No outstanding allocations")); + ACPI_INFO ((AE_INFO, "No outstanding allocations")); } else { - ACPI_ERROR ((AE_INFO, - "%d(0x%X) Outstanding allocations", + ACPI_ERROR ((AE_INFO, "%d(0x%X) Outstanding allocations", NumOutstanding, NumOutstanding)); } diff --git a/sys/contrib/ipfilter/netinet/ip_auth.c b/sys/contrib/ipfilter/netinet/ip_auth.c index 19a12f47525..42d29d4f2a2 100644 --- a/sys/contrib/ipfilter/netinet/ip_auth.c +++ b/sys/contrib/ipfilter/netinet/ip_auth.c @@ -70,11 +70,6 @@ struct file; #include #include #include -#if !defined(_KERNEL) && defined(__FreeBSD_version) && \ - __FreeBSD_version >= 800049 -# define V_ip_do_randomid ip_do_randomid -# define V_ip_id ip_id -#endif #if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi) # define KERNEL # define _KERNEL diff --git a/sys/contrib/ipfilter/netinet/ip_compat.h b/sys/contrib/ipfilter/netinet/ip_compat.h index 31e5b114e00..35c9f68ffc0 100644 --- a/sys/contrib/ipfilter/netinet/ip_compat.h +++ b/sys/contrib/ipfilter/netinet/ip_compat.h @@ -975,7 +975,6 @@ typedef u_int32_t u_32_t; # define SPL_NET(x) ; # define SPL_IMP(x) ; # define SPL_SCHED(x) ; -extern int in_cksum __P((struct mbuf *, int)); # else # define SPL_SCHED(x) x = splhigh() # endif /* __FreeBSD_version >= 500043 */ diff --git a/sys/ddb/db_sym.c b/sys/ddb/db_sym.c index 99209a89c83..04af1eba535 100644 --- a/sys/ddb/db_sym.c +++ b/sys/ddb/db_sym.c @@ -64,12 +64,6 @@ static boolean_t db_line_at_pc(c_db_sym_t, char **, int *, db_expr_t); static int db_cpu = -1; #ifdef VIMAGE -extern uintptr_t *__start_set_vnet; -extern uintptr_t *__stop_set_vnet; - -#define VNET_START (uintptr_t)&__start_set_vnet -#define VNET_STOP (uintptr_t)&__stop_set_vnet - static void *db_vnet = NULL; #endif diff --git a/sys/dev/acpica/Osd/OsdHardware.c b/sys/dev/acpica/Osd/OsdHardware.c index 822bf78545d..06388027c60 100644 --- a/sys/dev/acpica/Osd/OsdHardware.c +++ b/sys/dev/acpica/Osd/OsdHardware.c @@ -216,12 +216,12 @@ acpi_bus_number(ACPI_HANDLE root, ACPI_HANDLE curr, ACPI_PCI_ID *PciId) /* * Find the bus number for a device * - * rhandle: handle for the root bus - * chandle: handle for the device + * Device: handle for the PCI root bridge device + * Region: handle for the PCI configuration space operation region * PciId: pointer to device slot and function, we fill out bus */ void -AcpiOsDerivePciId(ACPI_HANDLE rhandle, ACPI_HANDLE chandle, ACPI_PCI_ID **PciId) +AcpiOsDerivePciId(ACPI_HANDLE Device, ACPI_HANDLE Region, ACPI_PCI_ID **PciId) { ACPI_HANDLE parent; ACPI_STATUS status; @@ -230,26 +230,21 @@ AcpiOsDerivePciId(ACPI_HANDLE rhandle, ACPI_HANDLE chandle, ACPI_PCI_ID **PciId) if (pci_cfgregopen() == 0) panic("AcpiOsDerivePciId unable to initialize pci bus"); - /* Try to read _BBN for bus number if we're at the root */ + /* Try to read _BBN for bus number if we're at the root. */ bus = 0; - if (rhandle == chandle) { - status = acpi_GetInteger(rhandle, "_BBN", &bus); + if (Device == Region) { + status = acpi_GetInteger(Device, "_BBN", &bus); if (ACPI_FAILURE(status) && bootverbose) printf("AcpiOsDerivePciId: root bus has no _BBN, assuming 0\n"); } - /* - * Get the parent handle and call the recursive case. It is not - * clear why we seem to be getting a chandle that points to a child - * of the desired slot/function but passing in the parent handle - * here works. - */ - if (ACPI_SUCCESS(AcpiGetParent(chandle, &parent))) - bus = acpi_bus_number(rhandle, parent, *PciId); + /* Get the parent handle and call the recursive case. */ + if (ACPI_SUCCESS(AcpiGetParent(Region, &parent))) + bus = acpi_bus_number(Device, parent, *PciId); (*PciId)->Bus = bus; if (bootverbose) { printf("AcpiOsDerivePciId: %s -> bus %d dev %d func %d\n", - acpi_name(chandle), (*PciId)->Bus, (*PciId)->Device, + acpi_name(Region), (*PciId)->Bus, (*PciId)->Device, (*PciId)->Function); } } diff --git a/sys/dev/ae/if_ae.c b/sys/dev/ae/if_ae.c index 68181f4b6d4..d2812a4990b 100644 --- a/sys/dev/ae/if_ae.c +++ b/sys/dev/ae/if_ae.c @@ -378,7 +378,7 @@ ae_attach(device_t dev) ifp->if_init = ae_init; ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; ifp->if_hwassist = 0; - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); if (pci_find_extcap(dev, PCIY_PMG, &pmc) == 0) { diff --git a/sys/dev/age/if_age.c b/sys/dev/age/if_age.c index 3c5a1079774..15ce0eff851 100644 --- a/sys/dev/age/if_age.c +++ b/sys/dev/age/if_age.c @@ -1629,22 +1629,8 @@ age_encap(struct age_softc *sc, struct mbuf **m_head) } m = *m_head; - /* Configure Tx IP/TCP/UDP checksum offload. */ - if ((m->m_pkthdr.csum_flags & AGE_CSUM_FEATURES) != 0) { - cflags |= AGE_TD_CSUM; - if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) - cflags |= AGE_TD_TCPCSUM; - if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) - cflags |= AGE_TD_UDPCSUM; - /* Set checksum start offset. */ - cflags |= (poff << AGE_TD_CSUM_PLOADOFFSET_SHIFT); - /* Set checksum insertion position of TCP/UDP. */ - cflags |= ((poff + m->m_pkthdr.csum_data) << - AGE_TD_CSUM_XSUMOFFSET_SHIFT); - } - - /* Configure TSO. */ if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + /* Configure TSO. */ if (poff + (tcp->th_off << 2) == m->m_pkthdr.len) { /* Not TSO but IP/TCP checksum offload. */ cflags |= AGE_TD_IPCSUM | AGE_TD_TCPCSUM; @@ -1660,6 +1646,18 @@ age_encap(struct age_softc *sc, struct mbuf **m_head) /* Set IP/TCP header size. */ cflags |= ip->ip_hl << AGE_TD_IPHDR_LEN_SHIFT; cflags |= tcp->th_off << AGE_TD_TSO_TCPHDR_LEN_SHIFT; + } else if ((m->m_pkthdr.csum_flags & AGE_CSUM_FEATURES) != 0) { + /* Configure Tx IP/TCP/UDP checksum offload. */ + cflags |= AGE_TD_CSUM; + if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) + cflags |= AGE_TD_TCPCSUM; + if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) + cflags |= AGE_TD_UDPCSUM; + /* Set checksum start offset. */ + cflags |= (poff << AGE_TD_CSUM_PLOADOFFSET_SHIFT); + /* Set checksum insertion position of TCP/UDP. */ + cflags |= ((poff + m->m_pkthdr.csum_data) << + AGE_TD_CSUM_XSUMOFFSET_SHIFT); } /* Configure VLAN hardware tag insertion. */ diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c index c992c931846..577a7e9ec95 100644 --- a/sys/dev/agp/agp.c +++ b/sys/dev/agp/agp.c @@ -623,9 +623,9 @@ bad: m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) vm_page_wakeup(m); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(mem->am_obj); @@ -657,9 +657,9 @@ agp_generic_unbind_memory(device_t dev, struct agp_memory *mem) VM_OBJECT_LOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(mem->am_obj); diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 95a9cc40c9f..ff3ad1c614a 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -1010,10 +1010,10 @@ agp_i810_free_memory(device_t dev, struct agp_memory *mem) VM_OBJECT_LOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); - VM_OBJECT_UNLOCK(mem->am_obj); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); + VM_OBJECT_UNLOCK(mem->am_obj); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); sc->argb_cursor = NULL; diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index 5d399683945..1aa719e044b 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -111,6 +111,7 @@ static struct { #define AHCI_Q_EDGEIS 64 #define AHCI_Q_SATA2 128 #define AHCI_Q_NOBSYRES 256 +#define AHCI_Q_NOAA 512 } ahci_ids[] = { {0x43801002, 0x00, "ATI IXP600", 0}, {0x43901002, 0x00, "ATI IXP700", 0}, @@ -167,75 +168,75 @@ static struct { {0x614511ab, 0x00, "Marvell 88SX6145", AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS}, {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_NOBSYRES}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES}, - {0x044c10de, 0x00, "NVIDIA MCP65", 0}, - {0x044d10de, 0x00, "NVIDIA MCP65", 0}, - {0x044e10de, 0x00, "NVIDIA MCP65", 0}, - {0x044f10de, 0x00, "NVIDIA MCP65", 0}, - {0x045c10de, 0x00, "NVIDIA MCP65", 0}, - {0x045d10de, 0x00, "NVIDIA MCP65", 0}, - {0x045e10de, 0x00, "NVIDIA MCP65", 0}, - {0x045f10de, 0x00, "NVIDIA MCP65", 0}, - {0x055010de, 0x00, "NVIDIA MCP67", 0}, - {0x055110de, 0x00, "NVIDIA MCP67", 0}, - {0x055210de, 0x00, "NVIDIA MCP67", 0}, - {0x055310de, 0x00, "NVIDIA MCP67", 0}, - {0x055410de, 0x00, "NVIDIA MCP67", 0}, - {0x055510de, 0x00, "NVIDIA MCP67", 0}, - {0x055610de, 0x00, "NVIDIA MCP67", 0}, - {0x055710de, 0x00, "NVIDIA MCP67", 0}, - {0x055810de, 0x00, "NVIDIA MCP67", 0}, - {0x055910de, 0x00, "NVIDIA MCP67", 0}, - {0x055A10de, 0x00, "NVIDIA MCP67", 0}, - {0x055B10de, 0x00, "NVIDIA MCP67", 0}, - {0x058410de, 0x00, "NVIDIA MCP67", 0}, - {0x07f010de, 0x00, "NVIDIA MCP73", 0}, - {0x07f110de, 0x00, "NVIDIA MCP73", 0}, - {0x07f210de, 0x00, "NVIDIA MCP73", 0}, - {0x07f310de, 0x00, "NVIDIA MCP73", 0}, - {0x07f410de, 0x00, "NVIDIA MCP73", 0}, - {0x07f510de, 0x00, "NVIDIA MCP73", 0}, - {0x07f610de, 0x00, "NVIDIA MCP73", 0}, - {0x07f710de, 0x00, "NVIDIA MCP73", 0}, - {0x07f810de, 0x00, "NVIDIA MCP73", 0}, - {0x07f910de, 0x00, "NVIDIA MCP73", 0}, - {0x07fa10de, 0x00, "NVIDIA MCP73", 0}, - {0x07fb10de, 0x00, "NVIDIA MCP73", 0}, - {0x0ad010de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad110de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad210de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad310de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad410de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad510de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad610de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad710de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad810de, 0x00, "NVIDIA MCP77", 0}, - {0x0ad910de, 0x00, "NVIDIA MCP77", 0}, - {0x0ada10de, 0x00, "NVIDIA MCP77", 0}, - {0x0adb10de, 0x00, "NVIDIA MCP77", 0}, - {0x0ab410de, 0x00, "NVIDIA MCP79", 0}, - {0x0ab510de, 0x00, "NVIDIA MCP79", 0}, - {0x0ab610de, 0x00, "NVIDIA MCP79", 0}, - {0x0ab710de, 0x00, "NVIDIA MCP79", 0}, - {0x0ab810de, 0x00, "NVIDIA MCP79", 0}, - {0x0ab910de, 0x00, "NVIDIA MCP79", 0}, - {0x0aba10de, 0x00, "NVIDIA MCP79", 0}, - {0x0abb10de, 0x00, "NVIDIA MCP79", 0}, - {0x0abc10de, 0x00, "NVIDIA MCP79", 0}, - {0x0abd10de, 0x00, "NVIDIA MCP79", 0}, - {0x0abe10de, 0x00, "NVIDIA MCP79", 0}, - {0x0abf10de, 0x00, "NVIDIA MCP79", 0}, - {0x0d8410de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8510de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8610de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8710de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8810de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8910de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8a10de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8b10de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8c10de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8d10de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8e10de, 0x00, "NVIDIA MCP89", 0}, - {0x0d8f10de, 0x00, "NVIDIA MCP89", 0}, + {0x044c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x044d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x044e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x044f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x045c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x045d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x045e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x045f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, + {0x055010de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055110de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055210de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055310de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055510de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055610de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055710de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055810de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055910de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055A10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x055B10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x058410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, + {0x07f010de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f110de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f210de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f310de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f410de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f510de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f610de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f710de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f810de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07f910de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07fa10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x07fb10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, + {0x0ad010de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad110de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad210de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad310de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad410de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad510de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad610de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad710de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad810de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ad910de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ada10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0adb10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, + {0x0ab410de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0ab510de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0ab610de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0ab710de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0ab810de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0ab910de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0aba10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0abb10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0abc10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0abd10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0abe10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0abf10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, + {0x0d8410de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8510de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8610de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8710de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8810de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8910de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8a10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8b10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8c10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8d10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8e10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, + {0x0d8f10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x33491106, 0x00, "VIA VT8251", 0}, {0x62871106, 0x00, "VIA VT8251", 0}, {0x11841039, 0x00, "SiS 966", 0}, @@ -339,6 +340,7 @@ ahci_attach(device_t dev) rman_fini(&ctlr->sc_iomem); return (error); } + pci_enable_busmaster(dev); /* Reset controller */ if ((error = ahci_ctlr_reset(dev)) != 0) { bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); @@ -860,7 +862,14 @@ ahci_ch_attach(device_t dev) ch->user[i].mode = 0; ch->user[i].bytecount = 8192; ch->user[i].tags = ch->numslots; + ch->user[i].caps = 0; ch->curr[i] = ch->user[i]; + if (ch->pm_level) { + ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ | + CTS_SATA_CAPS_H_APST | + CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST; + } + ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA; } rid = ch->unit; if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, @@ -1960,7 +1969,8 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) et != AHCI_ERR_TIMEOUT) ahci_rearm_timeout(dev); /* Start PM timer. */ - if (ch->numrslots == 0 && ch->pm_level > 3) { + if (ch->numrslots == 0 && ch->pm_level > 3 && + (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { callout_schedule(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8); } @@ -2083,6 +2093,7 @@ ahci_start(device_t dev, int fbs) } /* Start operations on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); + cmd &= ~AHCI_P_CMD_PMA; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST | (ch->pm_present ? AHCI_P_CMD_PMA : 0)); } @@ -2381,6 +2392,24 @@ ahci_sata_phy_reset(device_t dev) return (1); } +static int +ahci_check_ids(device_t dev, union ccb *ccb) +{ + struct ahci_channel *ch = device_get_softc(dev); + + if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) { + ccb->ccb_h.status = CAM_TID_INVALID; + xpt_done(ccb); + return (-1); + } + if (ccb->ccb_h.target_lun != 0) { + ccb->ccb_h.status = CAM_LUN_INVALID; + xpt_done(ccb); + return (-1); + } + return (0); +} + static void ahciaction(struct cam_sim *sim, union ccb *ccb) { @@ -2396,9 +2425,12 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: - if (ch->devices == 0) { + if (ahci_check_ids(dev, ccb)) + return; + if (ch->devices == 0 || + (ch->pm_present == 0 && + ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; - xpt_done(ccb); break; } /* Check for command collision. */ @@ -2410,7 +2442,7 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) return; } ahci_begin_transaction(dev, ccb); - break; + return; case XPT_EN_LUN: /* Enable LUN as a target */ case XPT_TARGET_IO: /* Execute target I/O request */ case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ @@ -2418,13 +2450,14 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; + if (ahci_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -2441,8 +2474,9 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) ch->pm_present = cts->xport_specific.sata.pm_present; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI) d->atapi = cts->xport_specific.sata.atapi; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + d->caps = cts->xport_specific.sata.caps; ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } case XPT_GET_TRAN_SETTINGS: @@ -2452,6 +2486,8 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) struct ahci_device *d; uint32_t status; + if (ahci_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -2472,9 +2508,24 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; } + cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; + if (ch->pm_level) { + if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC)) + cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; + if (ch->caps2 & AHCI_CAP2_APST) + cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST; + } + if ((ch->caps & AHCI_CAP_SNCQ) && + (ch->quirks & AHCI_Q_NOAA) == 0) + cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA; + cts->xport_specific.sata.caps &= + ch->user[ccb->ccb_h.target_id].caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } else { cts->xport_specific.sata.revision = d->revision; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; + cts->xport_specific.sata.caps = d->caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } cts->xport_specific.sata.mode = d->mode; cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; @@ -2487,48 +2538,16 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) cts->xport_specific.sata.atapi = d->atapi; cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } -#if 0 - case XPT_CALC_GEOMETRY: - { - struct ccb_calc_geometry *ccg; - uint32_t size_mb; - uint32_t secs_per_cylinder; - - ccg = &ccb->ccg; - size_mb = ccg->volume_size - / ((1024L * 1024L) / ccg->block_size); - if (size_mb >= 1024 && (aha->extended_trans != 0)) { - if (size_mb >= 2048) { - ccg->heads = 255; - ccg->secs_per_track = 63; - } else { - ccg->heads = 128; - ccg->secs_per_track = 32; - } - } else { - ccg->heads = 64; - ccg->secs_per_track = 32; - } - secs_per_cylinder = ccg->heads * ccg->secs_per_track; - ccg->cylinders = ccg->volume_size / secs_per_cylinder; - ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); - break; - } -#endif case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ ahci_reset(dev); ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_PATH_INQ: /* Path routing inquiry */ { @@ -2564,14 +2583,13 @@ ahciaction(struct cam_sim *sim, union ccb *ccb) if (pci_get_devid(device_get_parent(dev)) == 0x43801002) cpi->maxio = min(cpi->maxio, 128 * 512); cpi->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; } + xpt_done(ccb); } static void diff --git a/sys/dev/ahci/ahci.h b/sys/dev/ahci/ahci.h index d4c73c9cfe5..1473962eb5b 100644 --- a/sys/dev/ahci/ahci.h +++ b/sys/dev/ahci/ahci.h @@ -372,6 +372,7 @@ struct ahci_device { u_int bytecount; u_int atapi; u_int tags; + u_int caps; }; /* structure describing an ATA channel */ diff --git a/sys/dev/alc/if_alc.c b/sys/dev/alc/if_alc.c index e95b04453cd..c685b84e863 100644 --- a/sys/dev/alc/if_alc.c +++ b/sys/dev/alc/if_alc.c @@ -1908,28 +1908,7 @@ alc_encap(struct alc_softc *sc, struct mbuf **m_head) vtag = (vtag << TD_VLAN_SHIFT) & TD_VLAN_MASK; cflags |= TD_INS_VLAN_TAG; } - /* Configure Tx checksum offload. */ - if ((m->m_pkthdr.csum_flags & ALC_CSUM_FEATURES) != 0) { -#ifdef ALC_USE_CUSTOM_CSUM - cflags |= TD_CUSTOM_CSUM; - /* Set checksum start offset. */ - cflags |= ((poff >> 1) << TD_PLOAD_OFFSET_SHIFT) & - TD_PLOAD_OFFSET_MASK; - /* Set checksum insertion position of TCP/UDP. */ - cflags |= (((poff + m->m_pkthdr.csum_data) >> 1) << - TD_CUSTOM_CSUM_OFFSET_SHIFT) & TD_CUSTOM_CSUM_OFFSET_MASK; -#else - if ((m->m_pkthdr.csum_flags & CSUM_IP) != 0) - cflags |= TD_IPCSUM; - if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) - cflags |= TD_TCPCSUM; - if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) - cflags |= TD_UDPCSUM; - /* Set TCP/UDP header offset. */ - cflags |= (poff << TD_L4HDR_OFFSET_SHIFT) & - TD_L4HDR_OFFSET_MASK; -#endif - } else if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { /* Request TSO and set MSS. */ cflags |= TD_TSO | TD_TSO_DESCV1; cflags |= ((uint32_t)m->m_pkthdr.tso_segsz << TD_MSS_SHIFT) & @@ -1961,6 +1940,27 @@ alc_encap(struct alc_softc *sc, struct mbuf **m_head) } /* Handle remaining fragments. */ idx = 1; + } else if ((m->m_pkthdr.csum_flags & ALC_CSUM_FEATURES) != 0) { + /* Configure Tx checksum offload. */ +#ifdef ALC_USE_CUSTOM_CSUM + cflags |= TD_CUSTOM_CSUM; + /* Set checksum start offset. */ + cflags |= ((poff >> 1) << TD_PLOAD_OFFSET_SHIFT) & + TD_PLOAD_OFFSET_MASK; + /* Set checksum insertion position of TCP/UDP. */ + cflags |= (((poff + m->m_pkthdr.csum_data) >> 1) << + TD_CUSTOM_CSUM_OFFSET_SHIFT) & TD_CUSTOM_CSUM_OFFSET_MASK; +#else + if ((m->m_pkthdr.csum_flags & CSUM_IP) != 0) + cflags |= TD_IPCSUM; + if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) + cflags |= TD_TCPCSUM; + if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) + cflags |= TD_UDPCSUM; + /* Set TCP/UDP header offset. */ + cflags |= (poff << TD_L4HDR_OFFSET_SHIFT) & + TD_L4HDR_OFFSET_MASK; +#endif } for (; idx < nsegs; idx++) { desc = &sc->alc_rdata.alc_tx_ring[prod]; diff --git a/sys/dev/ale/if_ale.c b/sys/dev/ale/if_ale.c index 76f1b74f2af..ea6b53b6ff9 100644 --- a/sys/dev/ale/if_ale.c +++ b/sys/dev/ale/if_ale.c @@ -1585,7 +1585,7 @@ ale_encap(struct ale_softc *sc, struct mbuf **m_head) struct tcphdr *tcp; bus_dma_segment_t txsegs[ALE_MAXTXSEGS]; bus_dmamap_t map; - uint32_t cflags, ip_off, poff, vtag; + uint32_t cflags, hdrlen, ip_off, poff, vtag; int error, i, nsegs, prod, si; ALE_LOCK_ASSERT(sc); @@ -1678,6 +1678,11 @@ ale_encap(struct ale_softc *sc, struct mbuf **m_head) return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + poff); + m = m_pullup(m, poff + (tcp->th_off << 2)); + if (m == NULL) { + *m_head = NULL; + return (ENOBUFS); + } /* * AR81xx requires IP/TCP header size and offset as * well as TCP pseudo checksum which complicates @@ -1730,15 +1735,21 @@ ale_encap(struct ale_softc *sc, struct mbuf **m_head) } /* Check descriptor overrun. */ - if (sc->ale_cdata.ale_tx_cnt + nsegs >= ALE_TX_RING_CNT - 2) { + if (sc->ale_cdata.ale_tx_cnt + nsegs >= ALE_TX_RING_CNT - 3) { bus_dmamap_unload(sc->ale_cdata.ale_tx_tag, map); return (ENOBUFS); } bus_dmamap_sync(sc->ale_cdata.ale_tx_tag, map, BUS_DMASYNC_PREWRITE); m = *m_head; - /* Configure Tx checksum offload. */ - if ((m->m_pkthdr.csum_flags & ALE_CSUM_FEATURES) != 0) { + if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + /* Request TSO and set MSS. */ + cflags |= ALE_TD_TSO; + cflags |= ((uint32_t)m->m_pkthdr.tso_segsz << ALE_TD_MSS_SHIFT); + /* Set IP/TCP header size. */ + cflags |= ip->ip_hl << ALE_TD_IPHDR_LEN_SHIFT; + cflags |= tcp->th_off << ALE_TD_TCPHDR_LEN_SHIFT; + } else if ((m->m_pkthdr.csum_flags & ALE_CSUM_FEATURES) != 0) { /* * AR81xx supports Tx custom checksum offload feature * that offloads single 16bit checksum computation. @@ -1769,15 +1780,6 @@ ale_encap(struct ale_softc *sc, struct mbuf **m_head) ALE_TD_CSUM_XSUMOFFSET_SHIFT); } - if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { - /* Request TSO and set MSS. */ - cflags |= ALE_TD_TSO; - cflags |= ((uint32_t)m->m_pkthdr.tso_segsz << ALE_TD_MSS_SHIFT); - /* Set IP/TCP header size. */ - cflags |= ip->ip_hl << ALE_TD_IPHDR_LEN_SHIFT; - cflags |= tcp->th_off << ALE_TD_TCPHDR_LEN_SHIFT; - } - /* Configure VLAN hardware tag insertion. */ if ((m->m_flags & M_VLANTAG) != 0) { vtag = ALE_TX_VLAN_TAG(m->m_pkthdr.ether_vtag); @@ -1785,8 +1787,32 @@ ale_encap(struct ale_softc *sc, struct mbuf **m_head) cflags |= ALE_TD_INSERT_VLAN_TAG; } - desc = NULL; - for (i = 0; i < nsegs; i++) { + i = 0; + if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + /* + * Make sure the first fragment contains + * only ethernet and IP/TCP header with options. + */ + hdrlen = poff + (tcp->th_off << 2); + desc = &sc->ale_cdata.ale_tx_ring[prod]; + desc->addr = htole64(txsegs[i].ds_addr); + desc->len = htole32(ALE_TX_BYTES(hdrlen) | vtag); + desc->flags = htole32(cflags); + sc->ale_cdata.ale_tx_cnt++; + ALE_DESC_INC(prod, ALE_TX_RING_CNT); + if (m->m_len - hdrlen > 0) { + /* Handle remaining payload of the first fragment. */ + desc = &sc->ale_cdata.ale_tx_ring[prod]; + desc->addr = htole64(txsegs[i].ds_addr + hdrlen); + desc->len = htole32(ALE_TX_BYTES(m->m_len - hdrlen) | + vtag); + desc->flags = htole32(cflags); + sc->ale_cdata.ale_tx_cnt++; + ALE_DESC_INC(prod, ALE_TX_RING_CNT); + } + i = 1; + } + for (; i < nsegs; i++) { desc = &sc->ale_cdata.ale_tx_ring[prod]; desc->addr = htole64(txsegs[i].ds_addr); desc->len = htole32(ALE_TX_BYTES(txsegs[i].ds_len) | vtag); diff --git a/sys/dev/an/if_an.c b/sys/dev/an/if_an.c index 6d9ef96831b..645fe419ebe 100644 --- a/sys/dev/an/if_an.c +++ b/sys/dev/an/if_an.c @@ -767,8 +767,8 @@ an_attach(struct an_softc *sc, int flags) ifp->if_start = an_start; ifp->if_init = an_init; ifp->if_baudrate = 10000000; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); bzero(sc->an_config.an_nodename, sizeof(sc->an_config.an_nodename)); diff --git a/sys/dev/ata/ata-all.c b/sys/dev/ata/ata-all.c index 73dfa94fd83..5c6f0db714f 100644 --- a/sys/dev/ata/ata-all.c +++ b/sys/dev/ata/ata-all.c @@ -1430,6 +1430,24 @@ ata_cam_end_transaction(device_t dev, struct ata_request *request) ata_reinit(dev); } +static int +ata_check_ids(device_t dev, union ccb *ccb) +{ + struct ata_channel *ch = device_get_softc(dev); + + if (ccb->ccb_h.target_id > ((ch->flags & ATA_NO_SLAVE) ? 0 : 1)) { + ccb->ccb_h.status = CAM_TID_INVALID; + xpt_done(ccb); + return (-1); + } + if (ccb->ccb_h.target_lun != 0) { + ccb->ccb_h.status = CAM_LUN_INVALID; + xpt_done(ccb); + return (-1); + } + return (0); +} + static void ataaction(struct cam_sim *sim, union ccb *ccb) { @@ -1445,10 +1463,11 @@ ataaction(struct cam_sim *sim, union ccb *ccb) /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: + if (ata_check_ids(dev, ccb)) + return; if ((ch->devices & ((ATA_ATA_MASTER | ATA_ATAPI_MASTER) << ccb->ccb_h.target_id)) == 0) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; - xpt_done(ccb); break; } if (ch->running) @@ -1467,11 +1486,10 @@ ataaction(struct cam_sim *sim, union ccb *ccb) res->lba_mid = 0x14; } ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } ata_cam_begin_transaction(dev, ccb); - break; + return; case XPT_EN_LUN: /* Enable LUN as a target */ case XPT_TARGET_IO: /* Execute target I/O request */ case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ @@ -1479,13 +1497,14 @@ ataaction(struct cam_sim *sim, union ccb *ccb) case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct ata_cam_device *d; + if (ata_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -1520,7 +1539,6 @@ ataaction(struct cam_sim *sim, union ccb *ccb) d->atapi = cts->xport_specific.ata.atapi; } ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } case XPT_GET_TRAN_SETTINGS: @@ -1528,6 +1546,8 @@ ataaction(struct cam_sim *sim, union ccb *ccb) struct ccb_trans_settings *cts = &ccb->cts; struct ata_cam_device *d; + if (ata_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -1567,48 +1587,16 @@ ataaction(struct cam_sim *sim, union ccb *ccb) cts->xport_specific.ata.valid |= CTS_ATA_VALID_ATAPI; } ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } -#if 0 - case XPT_CALC_GEOMETRY: - { - struct ccb_calc_geometry *ccg; - uint32_t size_mb; - uint32_t secs_per_cylinder; - - ccg = &ccb->ccg; - size_mb = ccg->volume_size - / ((1024L * 1024L) / ccg->block_size); - if (size_mb >= 1024 && (aha->extended_trans != 0)) { - if (size_mb >= 2048) { - ccg->heads = 255; - ccg->secs_per_track = 63; - } else { - ccg->heads = 128; - ccg->secs_per_track = 32; - } - } else { - ccg->heads = 64; - ccg->secs_per_track = 32; - } - secs_per_cylinder = ccg->heads * ccg->secs_per_track; - ccg->cylinders = ccg->volume_size / secs_per_cylinder; - ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); - break; - } -#endif case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ ata_reinit(dev); ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_PATH_INQ: /* Path routing inquiry */ { @@ -1643,14 +1631,13 @@ ataaction(struct cam_sim *sim, union ccb *ccb) cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; cpi->maxio = ch->dma.max_iosize ? ch->dma.max_iosize : DFLTPHYS; cpi->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; } + xpt_done(ccb); } static void diff --git a/sys/dev/ata/ata-queue.c b/sys/dev/ata/ata-queue.c index a3b1c4e4435..4aef47190cb 100644 --- a/sys/dev/ata/ata-queue.c +++ b/sys/dev/ata/ata-queue.c @@ -513,9 +513,9 @@ ata_timeout(struct ata_request *request) request->flags |= ATA_R_TIMEOUT; if (ch->dma.unload) ch->dma.unload(request); -#ifdef ATA_CAM ch->running = NULL; ch->state = ATA_IDLE; +#ifdef ATA_CAM ata_cam_end_transaction(ch->dev, request); #endif mtx_unlock(&ch->state_mtx); diff --git a/sys/dev/ata/chipsets/ata-acerlabs.c b/sys/dev/ata/chipsets/ata-acerlabs.c index b7f11472da2..fee9692ebd5 100644 --- a/sys/dev/ata/chipsets/ata-acerlabs.c +++ b/sys/dev/ata/chipsets/ata-acerlabs.c @@ -184,8 +184,11 @@ ata_ali_ch_attach(device_t dev) if (ctlr->chip->cfg2 & ALI_NEW && ctlr->chip->chiprev < 0xc7) ch->flags |= ATA_CHECKS_CABLE; /* older chips can't do 48bit DMA transfers */ - if (ctlr->chip->chiprev <= 0xc4) + if (ctlr->chip->chiprev <= 0xc4) { ch->flags |= ATA_NO_48BIT_DMA; + if (ch->dma.max_iosize > 256 * 512) + ch->dma.max_iosize = 256 * 512; + } return 0; } diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_reset.c b/sys/dev/ath/ath_hal/ar5212/ar5212_reset.c index fc937ea149c..8e6341a04c7 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212_reset.c +++ b/sys/dev/ath/ath_hal/ar5212/ar5212_reset.c @@ -283,6 +283,14 @@ ar5212Reset(struct ath_hal *ah, HAL_OPMODE opmode, regWrites = ath_hal_ini_write(ah, &ahp->ah_ini_modes, modesIndex, 0); regWrites = write_common(ah, &ahp->ah_ini_common, bChannelChange, regWrites); +#ifdef AH_RXCFG_SDMAMW_4BYTES + /* + * Nala doesn't work with 128 byte bursts on pb42(hydra) (ar71xx), + * use 4 instead. Enabling it on all platforms would hurt performance, + * so we only enable it on the ones that are affected by it. + */ + OS_REG_WRITE(ah, AR_RXCFG, 0); +#endif ahp->ah_rfHal->writeRegs(ah, modesIndex, freqIndex, regWrites); OS_MARK(ah, AH_MARK_RESET_LINE, __LINE__); diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index 58474890e72..15765f2b5b0 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -562,8 +562,8 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) ifp->if_start = ath_start; ifp->if_ioctl = ath_ioctl; ifp->if_init = ath_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/atkbdc/atkbdc.c b/sys/dev/atkbdc/atkbdc.c index df1f28e497d..f8e856a0efd 100644 --- a/sys/dev/atkbdc/atkbdc.c +++ b/sys/dev/atkbdc/atkbdc.c @@ -44,6 +44,10 @@ __FBSDID("$FreeBSD$"); #include #include +#if defined(__amd64__) +#include +#endif + #include #ifdef __sparc64__ @@ -153,7 +157,7 @@ atkbdc_configure(void) bus_space_tag_t tag; bus_space_handle_t h0; bus_space_handle_t h1; -#if defined(__i386__) +#if defined(__i386__) || defined(__amd64__) volatile int i; register_t flags; #endif @@ -222,7 +226,7 @@ atkbdc_configure(void) #endif #endif -#if defined(__i386__) +#if defined(__i386__) || defined(__amd64__) /* * Check if we really have AT keyboard controller. Poll status * register until we get "all clear" indication. If no such @@ -248,6 +252,11 @@ static int atkbdc_setup(atkbdc_softc_t *sc, bus_space_tag_t tag, bus_space_handle_t h0, bus_space_handle_t h1) { +#if defined(__amd64__) + u_int64_t tscval[3], read_delay; + register_t flags; +#endif + if (sc->ioh0 == 0) { /* XXX */ sc->command_byte = -1; sc->command_mask = 0; @@ -264,6 +273,33 @@ atkbdc_setup(atkbdc_softc_t *sc, bus_space_tag_t tag, bus_space_handle_t h0, sc->iot = tag; sc->ioh0 = h0; sc->ioh1 = h1; + +#if defined(__amd64__) + /* + * On certain chipsets AT keyboard controller isn't present and is + * emulated by BIOS using SMI interrupt. On those chipsets reading + * from the status port may be thousand times slower than usually. + * Sometimes this emilation is not working properly resulting in + * commands timing our and since we assume that inb() operation + * takes very little time to complete we need to adjust number of + * retries to keep waiting time within a designed limits (100ms). + * Measure time it takes to make read_status() call and adjust + * number of retries accordingly. + */ + flags = intr_disable(); + tscval[0] = rdtsc(); + read_status(sc); + tscval[1] = rdtsc(); + DELAY(1000); + tscval[2] = rdtsc(); + intr_restore(flags); + read_delay = tscval[1] - tscval[0]; + read_delay /= (tscval[2] - tscval[1]) / 1000; + sc->retry = 100000 / ((KBDD_DELAYTIME * 2) + read_delay); +#else + sc->retry = 5000; +#endif + return 0; } @@ -380,10 +416,12 @@ removeq(kqueue *q) static int wait_while_controller_busy(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 100msec at most */ - int retry = 5000; + int retry; int f; + /* CPU will stay inside the loop for 100msec at most */ + retry = kbdc->retry; + while ((f = read_status(kbdc)) & KBDS_INPUT_BUFFER_FULL) { if ((f & KBDS_BUFFER_FULL) == KBDS_KBD_BUFFER_FULL) { DELAY(KBDD_DELAYTIME); @@ -406,10 +444,12 @@ wait_while_controller_busy(struct atkbdc_softc *kbdc) static int wait_for_data(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 200msec at most */ - int retry = 10000; + int retry; int f; + /* CPU will stay inside the loop for 200msec at most */ + retry = kbdc->retry * 2; + while ((f = read_status(kbdc) & KBDS_ANY_BUFFER_FULL) == 0) { DELAY(KBDC_DELAYTIME); if (--retry < 0) @@ -423,10 +463,12 @@ wait_for_data(struct atkbdc_softc *kbdc) static int wait_for_kbd_data(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 200msec at most */ - int retry = 10000; + int retry; int f; + /* CPU will stay inside the loop for 200msec at most */ + retry = kbdc->retry * 2; + while ((f = read_status(kbdc) & KBDS_BUFFER_FULL) != KBDS_KBD_BUFFER_FULL) { if (f == KBDS_AUX_BUFFER_FULL) { @@ -448,11 +490,13 @@ wait_for_kbd_data(struct atkbdc_softc *kbdc) static int wait_for_kbd_ack(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 200msec at most */ - int retry = 10000; + int retry; int f; int b; + /* CPU will stay inside the loop for 200msec at most */ + retry = kbdc->retry * 2; + while (retry-- > 0) { if ((f = read_status(kbdc)) & KBDS_ANY_BUFFER_FULL) { DELAY(KBDD_DELAYTIME); @@ -475,10 +519,12 @@ wait_for_kbd_ack(struct atkbdc_softc *kbdc) static int wait_for_aux_data(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 200msec at most */ - int retry = 10000; + int retry; int f; + /* CPU will stay inside the loop for 200msec at most */ + retry = kbdc->retry * 2; + while ((f = read_status(kbdc) & KBDS_BUFFER_FULL) != KBDS_AUX_BUFFER_FULL) { if (f == KBDS_KBD_BUFFER_FULL) { @@ -500,11 +546,13 @@ wait_for_aux_data(struct atkbdc_softc *kbdc) static int wait_for_aux_ack(struct atkbdc_softc *kbdc) { - /* CPU will stay inside the loop for 200msec at most */ - int retry = 10000; + int retry; int f; int b; + /* CPU will stay inside the loop for 200msec at most */ + retry = kbdc->retry * 2; + while (retry-- > 0) { if ((f = read_status(kbdc)) & KBDS_ANY_BUFFER_FULL) { DELAY(KBDD_DELAYTIME); diff --git a/sys/dev/atkbdc/atkbdc_ebus.c b/sys/dev/atkbdc/atkbdc_ebus.c index 639203d548b..0dcb0a71e79 100644 --- a/sys/dev/atkbdc/atkbdc_ebus.c +++ b/sys/dev/atkbdc/atkbdc_ebus.c @@ -202,6 +202,7 @@ atkbdc_ebus_attach(device_t dev) "cannot determine command/data port resource\n"); return (ENXIO); } + sc->retry = 5000; sc->port0 = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, start, start, 1, RF_ACTIVE); if (sc->port0 == NULL) { diff --git a/sys/dev/atkbdc/atkbdc_isa.c b/sys/dev/atkbdc/atkbdc_isa.c index 975d2993571..10e172f4552 100644 --- a/sys/dev/atkbdc/atkbdc_isa.c +++ b/sys/dev/atkbdc/atkbdc_isa.c @@ -94,7 +94,7 @@ atkbdc_isa_probe(device_t dev) u_long count; int error; int rid; -#if defined(__i386__) +#if defined(__i386__) || defined(__amd64__) bus_space_tag_t tag; bus_space_handle_t ioh1; volatile int i; @@ -141,7 +141,7 @@ atkbdc_isa_probe(device_t dev) return ENXIO; } -#if defined(__i386__) +#if defined(__i386__) || defined(__amd64__) /* * Check if we really have AT keyboard controller. Poll status * register until we get "all clear" indication. If no such @@ -161,6 +161,8 @@ atkbdc_isa_probe(device_t dev) if (i == 65535) { bus_release_resource(dev, SYS_RES_IOPORT, 0, port0); bus_release_resource(dev, SYS_RES_IOPORT, 1, port1); + if (bootverbose) + device_printf(dev, "AT keyboard controller not found\n"); return ENXIO; } #endif @@ -201,6 +203,7 @@ atkbdc_isa_attach(device_t dev) } rid = 0; + sc->retry = 5000; sc->port0 = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (sc->port0 == NULL) diff --git a/sys/dev/atkbdc/atkbdcreg.h b/sys/dev/atkbdc/atkbdcreg.h index 0715d9ece63..7ea26a66dd8 100644 --- a/sys/dev/atkbdc/atkbdcreg.h +++ b/sys/dev/atkbdc/atkbdcreg.h @@ -200,6 +200,7 @@ typedef struct atkbdc_softc { int lock; /* FIXME: XXX not quite a semaphore... */ kqueue kbd; /* keyboard data queue */ kqueue aux; /* auxiliary data queue */ + int retry; } atkbdc_softc_t; enum kbdc_device_ivar { diff --git a/sys/dev/bce/if_bce.c b/sys/dev/bce/if_bce.c index 2ad5ab2d37f..ad7209e7fd5 100644 --- a/sys/dev/bce/if_bce.c +++ b/sys/dev/bce/if_bce.c @@ -303,7 +303,7 @@ static void bce_dump_txbd (struct bce_softc *, static void bce_dump_rxbd (struct bce_softc *, int, struct rx_bd *); #ifdef BCE_JUMBO_HDRSPLIT -static void bce_dump_pgbd (struct bce_softc *, +static void bce_dump_pgbd (struct bce_softc *, int, struct rx_bd *); #endif static void bce_dump_l2fhdr (struct bce_softc *, @@ -368,7 +368,7 @@ static int bce_nvram_write (struct bce_softc *, u32, u8 *, int); /****************************************************************************/ static void bce_get_media (struct bce_softc *); static void bce_init_media (struct bce_softc *); -static void bce_dma_map_addr (void *, +static void bce_dma_map_addr (void *, bus_dma_segment_t *, int, int); static int bce_dma_alloc (device_t); static void bce_dma_free (struct bce_softc *); @@ -379,7 +379,7 @@ static void bce_release_resources (struct bce_softc *); /****************************************************************************/ static int bce_fw_sync (struct bce_softc *, u32); static void bce_load_rv2p_fw (struct bce_softc *, u32 *, u32, u32); -static void bce_load_cpu_fw (struct bce_softc *, +static void bce_load_cpu_fw (struct bce_softc *, struct cpu_reg *, struct fw_info *); static void bce_start_cpu (struct bce_softc *, struct cpu_reg *); static void bce_halt_cpu (struct bce_softc *, struct cpu_reg *); @@ -401,21 +401,21 @@ static int bce_blockinit (struct bce_softc *); static int bce_init_tx_chain (struct bce_softc *); static void bce_free_tx_chain (struct bce_softc *); -static int bce_get_rx_buf (struct bce_softc *, +static int bce_get_rx_buf (struct bce_softc *, struct mbuf *, u16 *, u16 *, u32 *); static int bce_init_rx_chain (struct bce_softc *); static void bce_fill_rx_chain (struct bce_softc *); static void bce_free_rx_chain (struct bce_softc *); #ifdef BCE_JUMBO_HDRSPLIT -static int bce_get_pg_buf (struct bce_softc *, +static int bce_get_pg_buf (struct bce_softc *, struct mbuf *, u16 *, u16 *); static int bce_init_pg_chain (struct bce_softc *); static void bce_fill_pg_chain (struct bce_softc *); static void bce_free_pg_chain (struct bce_softc *); #endif -static struct mbuf *bce_tso_setup (struct bce_softc *, +static struct mbuf *bce_tso_setup (struct bce_softc *, struct mbuf **, u16 *); static int bce_tx_encap (struct bce_softc *, struct mbuf **); static void bce_start_locked (struct ifnet *); @@ -566,7 +566,7 @@ bce_probe(device_t dev) /* Print out the device identity. */ snprintf(descbuf, BCE_DEVDESC_MAX, "%s (%c%d)", - t->bce_name, (((pci_read_config(dev, + t->bce_name, (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), (pci_read_config(dev, PCIR_REVID, 4) & 0xf)); @@ -593,57 +593,60 @@ bce_probe(device_t dev) static void bce_print_adapter_info(struct bce_softc *sc) { - int i = 0; + int i = 0; DBENTER(BCE_VERBOSE_LOAD); - BCE_PRINTF("ASIC (0x%08X); ", sc->bce_chipid); - printf("Rev (%c%d); ", ((BCE_CHIP_ID(sc) & 0xf000) >> 12) + 'A', - ((BCE_CHIP_ID(sc) & 0x0ff0) >> 4)); + if (bootverbose) { + BCE_PRINTF("ASIC (0x%08X); ", sc->bce_chipid); + printf("Rev (%c%d); ", ((BCE_CHIP_ID(sc) & 0xf000) >> + 12) + 'A', ((BCE_CHIP_ID(sc) & 0x0ff0) >> 4)); - /* Bus info. */ - if (sc->bce_flags & BCE_PCIE_FLAG) { - printf("Bus (PCIe x%d, ", sc->link_width); - switch (sc->link_speed) { - case 1: printf("2.5Gbps); "); break; - case 2: printf("5Gbps); "); break; - default: printf("Unknown link speed); "); + + /* Bus info. */ + if (sc->bce_flags & BCE_PCIE_FLAG) { + printf("Bus (PCIe x%d, ", sc->link_width); + switch (sc->link_speed) { + case 1: printf("2.5Gbps); "); break; + case 2: printf("5Gbps); "); break; + default: printf("Unknown link speed); "); + } + } else { + printf("Bus (PCI%s, %s, %dMHz); ", + ((sc->bce_flags & BCE_PCIX_FLAG) ? "-X" : ""), + ((sc->bce_flags & BCE_PCI_32BIT_FLAG) ? + "32-bit" : "64-bit"), sc->bus_speed_mhz); } - } else { - printf("Bus (PCI%s, %s, %dMHz); ", - ((sc->bce_flags & BCE_PCIX_FLAG) ? "-X" : ""), - ((sc->bce_flags & BCE_PCI_32BIT_FLAG) ? - "32-bit" : "64-bit"), sc->bus_speed_mhz); - } - /* Firmware version and device features. */ - printf("B/C (%s); Flags (", sc->bce_bc_ver); + /* Firmware version and device features. */ + printf("B/C (%s); Flags (", sc->bce_bc_ver); -#ifdef BCE_JUMBO_HDRSPLIT - printf("SPLT"); - i++; -#endif + #ifdef BCE_JUMBO_HDRSPLIT + printf("SPLT"); + i++; + #endif - if (sc->bce_flags & BCE_USING_MSI_FLAG) { - if (i > 0) printf("|"); - printf("MSI"); i++; - } + if (sc->bce_flags & BCE_USING_MSI_FLAG) { + if (i > 0) printf("|"); + printf("MSI"); i++; + } - if (sc->bce_flags & BCE_USING_MSIX_FLAG) { - if (i > 0) printf("|"); - printf("MSI-X"); i++; - } + if (sc->bce_flags & BCE_USING_MSIX_FLAG) { + if (i > 0) printf("|"); + printf("MSI-X"); i++; + } - if (sc->bce_phy_flags & BCE_PHY_2_5G_CAPABLE_FLAG) { - if (i > 0) printf("|"); - printf("2.5G"); i++; - } + if (sc->bce_phy_flags & BCE_PHY_2_5G_CAPABLE_FLAG) { + if (i > 0) printf("|"); + printf("2.5G"); i++; + } - if (sc->bce_flags & BCE_MFW_ENABLE_FLAG) { - if (i > 0) printf("|"); - printf("MFW); MFW (%s)\n", sc->bce_mfw_ver); - } else { - printf(")\n"); + if (sc->bce_flags & BCE_MFW_ENABLE_FLAG) { + if (i > 0) printf("|"); + printf("MFW); MFW (%s)\n", sc->bce_mfw_ver); + } else { + printf(")\n"); + } } DBEXIT(BCE_VERBOSE_LOAD); @@ -785,13 +788,13 @@ bce_attach(device_t dev) (bce_msi_enable >= 1) && (sc->bce_msi_count == 0)) { sc->bce_msi_count = 1; if ((error = pci_alloc_msi(dev, &sc->bce_msi_count)) != 0) { - BCE_PRINTF("%s(%d): MSI allocation failed! error = %d\n", - __FILE__, __LINE__, error); + BCE_PRINTF("%s(%d): MSI allocation failed! " + "error = %d\n", __FILE__, __LINE__, error); sc->bce_msi_count = 0; pci_release_msi(dev); } else { - DBPRINT(sc, BCE_INFO_LOAD, "%s(): Using MSI interrupt.\n", - __FUNCTION__); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): Using MSI " + "interrupt.\n", __FUNCTION__); sc->bce_flags |= BCE_USING_MSI_FLAG; if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) @@ -848,10 +851,11 @@ bce_attach(device_t dev) case BCE_CHIP_ID_5709_B0: case BCE_CHIP_ID_5709_B1: case BCE_CHIP_ID_5709_B2: - BCE_PRINTF("%s(%d): Unsupported controller revision (%c%d)!\n", - __FILE__, __LINE__, - (((pci_read_config(dev, PCIR_REVID, 4) & 0xf0) >> 4) + 'A'), - (pci_read_config(dev, PCIR_REVID, 4) & 0xf)); + BCE_PRINTF("%s(%d): Unsupported controller " + "revision (%c%d)!\n", __FILE__, __LINE__, + (((pci_read_config(dev, PCIR_REVID, 4) & + 0xf0) >> 4) + 'A'), (pci_read_config(dev, + PCIR_REVID, 4) & 0xf)); rc = ENODEV; goto bce_attach_fail; } @@ -1072,19 +1076,19 @@ bce_attach(device_t dev) ifp = sc->bce_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { BCE_PRINTF("%s(%d): Interface allocation failed!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); rc = ENXIO; goto bce_attach_fail; } /* Initialize the ifnet interface. */ - ifp->if_softc = sc; + ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = bce_ioctl; - ifp->if_start = bce_start; - ifp->if_init = bce_init; - ifp->if_mtu = ETHERMTU; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = bce_ioctl; + ifp->if_start = bce_start; + ifp->if_init = bce_init; + ifp->if_mtu = ETHERMTU; if (bce_tso_enable) { ifp->if_hwassist = BCE_IF_HWASSIST | CSUM_TSO; @@ -1095,7 +1099,7 @@ bce_attach(device_t dev) ifp->if_capabilities = BCE_IF_CAPABILITIES; } - ifp->if_capenable = ifp->if_capabilities; + ifp->if_capenable = ifp->if_capabilities; /* * Assume standard mbuf sizes for buffer allocation. @@ -1105,16 +1109,17 @@ bce_attach(device_t dev) #ifdef BCE_JUMBO_HDRSPLIT sc->rx_bd_mbuf_alloc_size = MHLEN; /* Make sure offset is 16 byte aligned for hardware. */ - sc->rx_bd_mbuf_align_pad = roundup2((MSIZE - MHLEN), 16) - - (MSIZE - MHLEN); - sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - - sc->rx_bd_mbuf_align_pad; + sc->rx_bd_mbuf_align_pad = + roundup2((MSIZE - MHLEN), 16) - (MSIZE - MHLEN); + sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - + sc->rx_bd_mbuf_align_pad; sc->pg_bd_mbuf_alloc_size = MCLBYTES; #else sc->rx_bd_mbuf_alloc_size = MCLBYTES; - sc->rx_bd_mbuf_align_pad = roundup2(MCLBYTES, 16) - MCLBYTES; - sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - - sc->rx_bd_mbuf_align_pad; + sc->rx_bd_mbuf_align_pad = + roundup2(MCLBYTES, 16) - MCLBYTES; + sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - + sc->rx_bd_mbuf_align_pad; #endif ifp->if_snd.ifq_drv_maxlen = USABLE_TX_BD; @@ -1126,14 +1131,14 @@ bce_attach(device_t dev) else ifp->if_baudrate = IF_Mbps(1000); - /* Handle any special PHY initialization for SerDes PHYs. */ - bce_init_media(sc); + /* Handle any special PHY initialization for SerDes PHYs. */ + bce_init_media(sc); /* MII child bus by probing the PHY. */ if (mii_phy_probe(dev, &sc->bce_miibus, bce_ifmedia_upd, bce_ifmedia_sts)) { BCE_PRINTF("%s(%d): No PHY found on child MII bus!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); rc = ENXIO; goto bce_attach_fail; } @@ -1155,7 +1160,7 @@ bce_attach(device_t dev) if (rc) { BCE_PRINTF("%s(%d): Failed to setup IRQ!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); bce_detach(dev); goto bce_attach_exit; } @@ -1396,6 +1401,9 @@ bce_reg_wr_ind(struct bce_softc *sc, u32 offset, u32 val) static void bce_shmem_wr(struct bce_softc *sc, u32 offset, u32 val) { + DBPRINT(sc, BCE_VERBOSE_FIRMWARE, "%s(): Writing 0x%08X to " + "0x%08X\n", __FUNCTION__, val, offset); + bce_reg_wr_ind(sc, sc->bce_shmem_base + offset, val); } @@ -1411,7 +1419,12 @@ bce_shmem_wr(struct bce_softc *sc, u32 offset, u32 val) static u32 bce_shmem_rd(struct bce_softc *sc, u32 offset) { - return (bce_reg_rd_ind(sc, sc->bce_shmem_base + offset)); + u32 val = bce_reg_rd_ind(sc, sc->bce_shmem_base + offset); + + DBPRINT(sc, BCE_VERBOSE_FIRMWARE, "%s(): Reading 0x%08X from " + "0x%08X\n", __FUNCTION__, val, offset); + + return val; } @@ -1430,9 +1443,9 @@ bce_ctx_rd(struct bce_softc *sc, u32 cid_addr, u32 ctx_offset) { u32 idx, offset, retry_cnt = 5, val; - DBRUNIF((cid_addr > MAX_CID_ADDR || ctx_offset & 0x3 || cid_addr & CTX_MASK), - BCE_PRINTF("%s(): Invalid CID address: 0x%08X.\n", - __FUNCTION__, cid_addr)); + DBRUNIF((cid_addr > MAX_CID_ADDR || ctx_offset & 0x3 || + cid_addr & CTX_MASK), BCE_PRINTF("%s(): Invalid CID " + "address: 0x%08X.\n", __FUNCTION__, cid_addr)); offset = ctx_offset + cid_addr; @@ -1450,8 +1463,8 @@ bce_ctx_rd(struct bce_softc *sc, u32 cid_addr, u32 ctx_offset) if (val & BCE_CTX_CTX_CTRL_READ_REQ) BCE_PRINTF("%s(%d); Unable to read CTX memory: " - "cid_addr = 0x%08X, offset = 0x%08X!\n", - __FILE__, __LINE__, cid_addr, ctx_offset); + "cid_addr = 0x%08X, offset = 0x%08X!\n", + __FILE__, __LINE__, cid_addr, ctx_offset); val = REG_RD(sc, BCE_CTX_CTX_DATA); } else { @@ -1487,7 +1500,7 @@ bce_ctx_wr(struct bce_softc *sc, u32 cid_addr, u32 ctx_offset, u32 ctx_val) DBRUNIF((cid_addr > MAX_CID_ADDR || ctx_offset & 0x3 || cid_addr & CTX_MASK), BCE_PRINTF("%s(): Invalid CID address: 0x%08X.\n", - __FUNCTION__, cid_addr)); + __FUNCTION__, cid_addr)); if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { @@ -1504,8 +1517,8 @@ bce_ctx_wr(struct bce_softc *sc, u32 cid_addr, u32 ctx_offset, u32 ctx_val) if (val & BCE_CTX_CTX_CTRL_WRITE_REQ) BCE_PRINTF("%s(%d); Unable to write CTX memory: " - "cid_addr = 0x%08X, offset = 0x%08X!\n", - __FILE__, __LINE__, cid_addr, ctx_offset); + "cid_addr = 0x%08X, offset = 0x%08X!\n", + __FILE__, __LINE__, cid_addr, ctx_offset); } else { REG_WR(sc, BCE_CTX_DATA_ADR, offset); @@ -1706,54 +1719,73 @@ bce_miibus_statchg(device_t dev) val = REG_RD(sc, BCE_EMAC_MODE); val &= ~(BCE_EMAC_MODE_PORT | BCE_EMAC_MODE_HALF_DUPLEX | - BCE_EMAC_MODE_MAC_LOOP | BCE_EMAC_MODE_FORCE_LINK | - BCE_EMAC_MODE_25G); + BCE_EMAC_MODE_MAC_LOOP | BCE_EMAC_MODE_FORCE_LINK | + BCE_EMAC_MODE_25G); - /* Set MII or GMII interface based on the speed negotiated by the PHY. */ + /* Set MII or GMII interface based on the PHY speed. */ switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: if (BCE_CHIP_NUM(sc) != BCE_CHIP_NUM_5706) { - DBPRINT(sc, BCE_INFO, "Enabling 10Mb interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, + "Enabling 10Mb interface.\n"); val |= BCE_EMAC_MODE_PORT_MII_10; break; } /* fall-through */ case IFM_100_TX: - DBPRINT(sc, BCE_INFO, "Enabling MII interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, "Enabling MII interface.\n"); val |= BCE_EMAC_MODE_PORT_MII; break; case IFM_2500_SX: - DBPRINT(sc, BCE_INFO, "Enabling 2.5G MAC mode.\n"); + DBPRINT(sc, BCE_INFO_PHY, "Enabling 2.5G MAC mode.\n"); val |= BCE_EMAC_MODE_25G; /* fall-through */ case IFM_1000_T: case IFM_1000_SX: - DBPRINT(sc, BCE_INFO, "Enabling GMII interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, "Enabling GMII interface.\n"); val |= BCE_EMAC_MODE_PORT_GMII; break; default: - DBPRINT(sc, BCE_INFO, "Unknown speed, enabling default GMII " - "interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, "Unknown link speed, enabling " + "default GMII interface.\n"); val |= BCE_EMAC_MODE_PORT_GMII; } - /* Set half or full duplex based on the duplicity negotiated by the PHY. */ + /* Set half or full duplex based on PHY settings. */ if ((mii->mii_media_active & IFM_GMASK) == IFM_HDX) { - DBPRINT(sc, BCE_INFO, "Setting Half-Duplex interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, + "Setting Half-Duplex interface.\n"); val |= BCE_EMAC_MODE_HALF_DUPLEX; } else - DBPRINT(sc, BCE_INFO, "Setting Full-Duplex interface.\n"); + DBPRINT(sc, BCE_INFO_PHY, + "Setting Full-Duplex interface.\n"); REG_WR(sc, BCE_EMAC_MODE, val); -#if 0 - /* ToDo: Enable flow control support in brgphy and bge. */ /* FLAG0 is set if RX is enabled and FLAG1 if TX is enabled */ - if (mii->mii_media_active & IFM_FLAG0) + if (mii->mii_media_active & IFM_FLAG0) { + DBPRINT(sc, BCE_INFO_PHY, + "%s(): Enabling RX flow control.\n", __FUNCTION__); BCE_SETBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN); - if (mii->mii_media_active & IFM_FLAG1) - BCE_SETBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_TX_MODE_FLOW_EN); -#endif + } else { + DBPRINT(sc, BCE_INFO_PHY, + "%s(): Disabling RX flow control.\n", __FUNCTION__); + BCE_CLRBIT(sc, BCE_EMAC_RX_MODE, BCE_EMAC_RX_MODE_FLOW_EN); + } + + if (mii->mii_media_active & IFM_FLAG1) { + DBPRINT(sc, BCE_INFO_PHY, + "%s(): Enabling TX flow control.\n", __FUNCTION__); + BCE_SETBIT(sc, BCE_EMAC_TX_MODE, BCE_EMAC_TX_MODE_FLOW_EN); + sc->bce_flags |= BCE_USING_TX_FLOW_CONTROL; + } else { + DBPRINT(sc, BCE_INFO_PHY, + "%s(): Disabling TX flow control.\n", __FUNCTION__); + BCE_CLRBIT(sc, BCE_EMAC_TX_MODE, BCE_EMAC_TX_MODE_FLOW_EN); + sc->bce_flags &= ~BCE_USING_TX_FLOW_CONTROL; + } + + /* ToDo: Update watermarks in bce_init_rx_context(). */ DBEXIT(BCE_VERBOSE_PHY); } @@ -1926,8 +1958,8 @@ bce_enable_nvram_access(struct bce_softc *sc) val = REG_RD(sc, BCE_NVM_ACCESS_ENABLE); /* Enable both bits, even on read. */ - REG_WR(sc, BCE_NVM_ACCESS_ENABLE, - val | BCE_NVM_ACCESS_ENABLE_EN | BCE_NVM_ACCESS_ENABLE_WR_EN); + REG_WR(sc, BCE_NVM_ACCESS_ENABLE, val | + BCE_NVM_ACCESS_ENABLE_EN | BCE_NVM_ACCESS_ENABLE_WR_EN); DBEXIT(BCE_VERBOSE_NVRAM); } @@ -1951,9 +1983,8 @@ bce_disable_nvram_access(struct bce_softc *sc) val = REG_RD(sc, BCE_NVM_ACCESS_ENABLE); /* Disable both bits, even after read. */ - REG_WR(sc, BCE_NVM_ACCESS_ENABLE, - val & ~(BCE_NVM_ACCESS_ENABLE_EN | - BCE_NVM_ACCESS_ENABLE_WR_EN)); + REG_WR(sc, BCE_NVM_ACCESS_ENABLE, val & + ~(BCE_NVM_ACCESS_ENABLE_EN | BCE_NVM_ACCESS_ENABLE_WR_EN)); DBEXIT(BCE_VERBOSE_NVRAM); } @@ -1983,7 +2014,7 @@ bce_nvram_erase_page(struct bce_softc *sc, u32 offset) /* Build an erase command. */ cmd = BCE_NVM_COMMAND_ERASE | BCE_NVM_COMMAND_WR | - BCE_NVM_COMMAND_DOIT; + BCE_NVM_COMMAND_DOIT; /* * Clear the DONE bit separately, set the NVRAM adress to erase, @@ -2026,8 +2057,8 @@ bce_nvram_erase_page_exit: /* 0 on success and the 32 bit value read, positive value on failure. */ /****************************************************************************/ static int -bce_nvram_read_dword(struct bce_softc *sc, u32 offset, u8 *ret_val, - u32 cmd_flags) +bce_nvram_read_dword(struct bce_softc *sc, + u32 offset, u8 *ret_val, u32 cmd_flags) { u32 cmd; int i, rc = 0; @@ -2040,8 +2071,8 @@ bce_nvram_read_dword(struct bce_softc *sc, u32 offset, u8 *ret_val, /* Calculate the offset for buffered flash if translation is used. */ if (sc->bce_flash_info->flags & BCE_NV_TRANSLATE) { offset = ((offset / sc->bce_flash_info->page_size) << - sc->bce_flash_info->page_bits) + - (offset % sc->bce_flash_info->page_size); + sc->bce_flash_info->page_bits) + + (offset % sc->bce_flash_info->page_size); } /* @@ -2070,8 +2101,8 @@ bce_nvram_read_dword(struct bce_softc *sc, u32 offset, u8 *ret_val, /* Check for errors. */ if (i >= NVRAM_TIMEOUT_COUNT) { - BCE_PRINTF("%s(%d): Timeout error reading NVRAM at offset 0x%08X!\n", - __FILE__, __LINE__, offset); + BCE_PRINTF("%s(%d): Timeout error reading NVRAM at " + "offset 0x%08X!\n", __FILE__, __LINE__, offset); rc = EBUSY; } @@ -2106,8 +2137,8 @@ bce_nvram_write_dword(struct bce_softc *sc, u32 offset, u8 *val, /* Calculate the offset for buffered flash if translation is used. */ if (sc->bce_flash_info->flags & BCE_NV_TRANSLATE) { offset = ((offset / sc->bce_flash_info->page_size) << - sc->bce_flash_info->page_bits) + - (offset % sc->bce_flash_info->page_size); + sc->bce_flash_info->page_bits) + + (offset % sc->bce_flash_info->page_size); } /* @@ -2129,8 +2160,8 @@ bce_nvram_write_dword(struct bce_softc *sc, u32 offset, u8 *val, break; } if (j >= NVRAM_TIMEOUT_COUNT) { - BCE_PRINTF("%s(%d): Timeout error writing NVRAM at offset 0x%08X\n", - __FILE__, __LINE__, offset); + BCE_PRINTF("%s(%d): Timeout error writing NVRAM at " + "offset 0x%08X\n", __FILE__, __LINE__, offset); rc = EBUSY; } @@ -2232,7 +2263,7 @@ bce_init_nvram(struct bce_softc *sc) if (j == entry_count) { sc->bce_flash_info = NULL; BCE_PRINTF("%s(%d): Unknown Flash NVRAM found!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); rc = ENODEV; } @@ -2246,8 +2277,8 @@ bce_init_nvram_get_flash_size: sc->bce_flash_size = sc->bce_flash_info->total_size; DBPRINT(sc, BCE_INFO_LOAD, "%s(): Found %s, size = 0x%08X\n", - __FUNCTION__, sc->bce_flash_info->name, - sc->bce_flash_info->total_size); + __FUNCTION__, sc->bce_flash_info->name, + sc->bce_flash_info->total_size); DBEXIT(BCE_VERBOSE_NVRAM); return rc; @@ -2604,7 +2635,8 @@ bce_nvram_test(struct bce_softc *sc) * the magic value at offset 0. */ if ((rc = bce_nvram_read(sc, 0, data, 4)) != 0) { - BCE_PRINTF("%s(%d): Unable to read NVRAM!\n", __FILE__, __LINE__); + BCE_PRINTF("%s(%d): Unable to read NVRAM!\n", + __FILE__, __LINE__); goto bce_nvram_test_exit; } @@ -2615,9 +2647,9 @@ bce_nvram_test(struct bce_softc *sc) magic = bce_be32toh(buf[0]); if (magic != BCE_NVRAM_MAGIC) { rc = ENODEV; - BCE_PRINTF("%s(%d): Invalid NVRAM magic value! Expected: 0x%08X, " - "Found: 0x%08X\n", - __FILE__, __LINE__, BCE_NVRAM_MAGIC, magic); + BCE_PRINTF("%s(%d): Invalid NVRAM magic value! " + "Expected: 0x%08X, Found: 0x%08X\n", + __FILE__, __LINE__, BCE_NVRAM_MAGIC, magic); goto bce_nvram_test_exit; } @@ -2626,26 +2658,27 @@ bce_nvram_test(struct bce_softc *sc) * configuration data. */ if ((rc = bce_nvram_read(sc, 0x100, data, BCE_NVRAM_SIZE)) != 0) { - BCE_PRINTF("%s(%d): Unable to read Manufacturing Information from " - "NVRAM!\n", __FILE__, __LINE__); + BCE_PRINTF("%s(%d): Unable to read manufacturing " + "Information from NVRAM!\n", __FILE__, __LINE__); goto bce_nvram_test_exit; } csum = ether_crc32_le(data, 0x100); if (csum != BCE_CRC32_RESIDUAL) { rc = ENODEV; - BCE_PRINTF("%s(%d): Invalid Manufacturing Information NVRAM CRC! " - "Expected: 0x%08X, Found: 0x%08X\n", - __FILE__, __LINE__, BCE_CRC32_RESIDUAL, csum); + BCE_PRINTF("%s(%d): Invalid manufacturing information " + "NVRAM CRC! Expected: 0x%08X, Found: 0x%08X\n", + __FILE__, __LINE__, BCE_CRC32_RESIDUAL, csum); goto bce_nvram_test_exit; } csum = ether_crc32_le(data + 0x100, 0x100); if (csum != BCE_CRC32_RESIDUAL) { rc = ENODEV; - BCE_PRINTF("%s(%d): Invalid Feature Configuration Information " - "NVRAM CRC! Expected: 0x%08X, Found: 08%08X\n", - __FILE__, __LINE__, BCE_CRC32_RESIDUAL, csum); + BCE_PRINTF("%s(%d): Invalid feature configuration " + "information NVRAM CRC! Expected: 0x%08X, " + "Found: 08%08X\n", __FILE__, __LINE__, + BCE_CRC32_RESIDUAL, csum); } bce_nvram_test_exit: @@ -2666,7 +2699,7 @@ bce_get_media(struct bce_softc *sc) { u32 val; - DBENTER(BCE_VERBOSE); + DBENTER(BCE_VERBOSE_PHY); /* Assume PHY address for copper controllers. */ sc->bce_phy_addr = 1; @@ -2692,10 +2725,10 @@ bce_get_media(struct bce_softc *sc) } if (val & BCE_MISC_DUAL_MEDIA_CTRL_STRAP_OVERRIDE) - strap = (val & + strap = (val & BCE_MISC_DUAL_MEDIA_CTRL_PHY_CTRL) >> 21; else - strap = (val & + strap = (val & BCE_MISC_DUAL_MEDIA_CTRL_PHY_CTRL_STRAP) >> 8; if (pci_get_function(sc->bce_dev) == 0) { @@ -2744,7 +2777,7 @@ bce_get_media(struct bce_softc *sc) val = bce_shmem_rd(sc, BCE_SHARED_HW_CFG_CONFIG); if (val & BCE_SHARED_HW_CFG_PHY_2_5G) { - sc->bce_phy_flags |= + sc->bce_phy_flags |= BCE_PHY_2_5G_CAPABLE_FLAG; DBPRINT(sc, BCE_INFO_LOAD, "Found 2.5Gb " "capable adapter\n"); @@ -2758,7 +2791,7 @@ bce_get_media_exit: DBPRINT(sc, (BCE_INFO_LOAD | BCE_INFO_PHY), "Using PHY address %d.\n", sc->bce_phy_addr); - DBEXIT(BCE_VERBOSE); + DBEXIT(BCE_VERBOSE_PHY); } @@ -3056,7 +3089,9 @@ bce_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) /* Simulate a mapping failure. */ DBRUNIF(DB_RANDOMTRUE(dma_map_addr_failed_sim_control), - error = ENOMEM); + error = ENOMEM); + + /* ToDo: How to increment debug sim_count variable here? */ /* Check for an error and signal the caller that an error occurred. */ if (error) { @@ -3154,7 +3189,7 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): status_block_paddr = 0x%jX\n", + DBPRINT(sc, BCE_INFO_LOAD, "%s(): status_block_paddr = 0x%jX\n", __FUNCTION__, (uintmax_t) sc->status_block_paddr); /* @@ -3193,7 +3228,7 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): stats_block_paddr = 0x%jX\n", + DBPRINT(sc, BCE_INFO_LOAD, "%s(): stats_block_paddr = 0x%jX\n", __FUNCTION__, (uintmax_t) sc->stats_block_paddr); /* BCM5709 uses host memory as cache for context memory. */ @@ -3217,8 +3252,8 @@ bce_dma_alloc(device_t dev) BCE_DMA_BOUNDARY, sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL, BCM_PAGE_SIZE, 1, BCM_PAGE_SIZE, 0, NULL, NULL, &sc->ctx_tag)) { - BCE_PRINTF("%s(%d): Could not allocate CTX DMA tag!\n", - __FILE__, __LINE__); + BCE_PRINTF("%s(%d): Could not allocate CTX " + "DMA tag!\n", __FILE__, __LINE__); rc = ENOMEM; goto bce_dma_alloc_exit; } @@ -3248,8 +3283,9 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): ctx_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, (uintmax_t) sc->ctx_paddr[i]); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): ctx_paddr[%d] " + "= 0x%jX\n", __FUNCTION__, i, + (uintmax_t) sc->ctx_paddr[i]); } } @@ -3262,15 +3298,15 @@ bce_dma_alloc(device_t dev) sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL, BCE_TX_CHAIN_PAGE_SZ, 1, BCE_TX_CHAIN_PAGE_SZ, 0, NULL, NULL, &sc->tx_bd_chain_tag)) { - BCE_PRINTF("%s(%d): Could not allocate TX descriptor chain " - "DMA tag!\n", __FILE__, __LINE__); + BCE_PRINTF("%s(%d): Could not allocate TX descriptor " + "chain DMA tag!\n", __FILE__, __LINE__); rc = ENOMEM; goto bce_dma_alloc_exit; } for (i = 0; i < TX_PAGES; i++) { - if(bus_dmamem_alloc(sc->tx_bd_chain_tag, + if(bus_dmamem_alloc(sc->tx_bd_chain_tag, (void **)&sc->tx_bd_chain[i], BUS_DMA_NOWAIT, &sc->tx_bd_chain_map[i])) { BCE_PRINTF("%s(%d): Could not allocate TX descriptor " @@ -3291,8 +3327,9 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): tx_bd_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, (uintmax_t) sc->tx_bd_chain_paddr[i]); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): tx_bd_chain_paddr[%d] = " + "0x%jX\n", __FUNCTION__, i, + (uintmax_t) sc->tx_bd_chain_paddr[i]); } /* Check the required size before mapping to conserve resources. */ @@ -3368,8 +3405,9 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): rx_bd_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, (uintmax_t) sc->rx_bd_chain_paddr[i]); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): rx_bd_chain_paddr[%d] = " + "0x%jX\n", __FUNCTION__, i, + (uintmax_t) sc->rx_bd_chain_paddr[i]); } /* @@ -3383,9 +3421,10 @@ bce_dma_alloc(device_t dev) #endif max_segments = 1; - DBPRINT(sc, BCE_INFO, "%s(): Creating rx_mbuf_tag (max size = 0x%jX " - "max segments = %d, max segment size = 0x%jX)\n", __FUNCTION__, - (uintmax_t) max_size, max_segments, (uintmax_t) max_seg_size); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): Creating rx_mbuf_tag " + "(max size = 0x%jX max segments = %d, max segment " + "size = 0x%jX)\n", __FUNCTION__, (uintmax_t) max_size, + max_segments, (uintmax_t) max_seg_size); if (bus_dma_tag_create(sc->parent_tag, 1, BCE_DMA_BOUNDARY, sc->max_bus_addr, BUS_SPACE_MAXADDR, NULL, NULL, max_size, @@ -3429,7 +3468,7 @@ bce_dma_alloc(device_t dev) (void **)&sc->pg_bd_chain[i], BUS_DMA_NOWAIT, &sc->pg_bd_chain_map[i])) { BCE_PRINTF("%s(%d): Could not allocate page " - "descriptor chain DMA memory!\n", + "descriptor chain DMA memory!\n", __FILE__, __LINE__); rc = ENOMEM; goto bce_dma_alloc_exit; @@ -3437,7 +3476,7 @@ bce_dma_alloc(device_t dev) bzero((char *)sc->pg_bd_chain[i], BCE_PG_CHAIN_PAGE_SZ); - error = bus_dmamap_load(sc->pg_bd_chain_tag, + error = bus_dmamap_load(sc->pg_bd_chain_tag, sc->pg_bd_chain_map[i], sc->pg_bd_chain[i], BCE_PG_CHAIN_PAGE_SZ, bce_dma_map_addr, &sc->pg_bd_chain_paddr[i], BUS_DMA_NOWAIT); @@ -3449,8 +3488,9 @@ bce_dma_alloc(device_t dev) goto bce_dma_alloc_exit; } - DBPRINT(sc, BCE_INFO, "%s(): pg_bd_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, (uintmax_t) sc->pg_bd_chain_paddr[i]); + DBPRINT(sc, BCE_INFO_LOAD, "%s(): pg_bd_chain_paddr[%d] = " + "0x%jX\n", __FUNCTION__, i, + (uintmax_t) sc->pg_bd_chain_paddr[i]); } /* @@ -3524,7 +3564,7 @@ bce_release_resources(struct bce_softc *sc) if (sc->bce_res_mem != NULL) { DBPRINT(sc, BCE_INFO_RESET, "Releasing PCI memory.\n"); - bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), + bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), sc->bce_res_mem); } @@ -3582,7 +3622,7 @@ bce_fw_sync(struct bce_softc *sc, u32 msg_data) DELAY(1000); } - /* If we've timed out, tell the bootcode that we've stopped waiting. */ + /* If we've timed out, tell bootcode that we've stopped waiting. */ if (((val & BCE_FW_MSG_ACK) != (msg_data & BCE_DRV_MSG_SEQ)) && ((msg_data & BCE_DRV_MSG_DATA) != BCE_DRV_MSG_DATA_WAIT0)) { @@ -4319,22 +4359,22 @@ bce_init_cpus(struct bce_softc *sc) (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { if ((BCE_CHIP_REV(sc) == BCE_CHIP_REV_Ax)) { - bce_load_rv2p_fw(sc, bce_xi90_rv2p_proc1, - sizeof(bce_xi90_rv2p_proc1), RV2P_PROC1); - bce_load_rv2p_fw(sc, bce_xi90_rv2p_proc2, - sizeof(bce_xi90_rv2p_proc2), RV2P_PROC2); + bce_load_rv2p_fw(sc, bce_xi90_rv2p_proc1, + sizeof(bce_xi90_rv2p_proc1), RV2P_PROC1); + bce_load_rv2p_fw(sc, bce_xi90_rv2p_proc2, + sizeof(bce_xi90_rv2p_proc2), RV2P_PROC2); } else { - bce_load_rv2p_fw(sc, bce_xi_rv2p_proc1, - sizeof(bce_xi_rv2p_proc1), RV2P_PROC1); - bce_load_rv2p_fw(sc, bce_xi_rv2p_proc2, - sizeof(bce_xi_rv2p_proc2), RV2P_PROC2); + bce_load_rv2p_fw(sc, bce_xi_rv2p_proc1, + sizeof(bce_xi_rv2p_proc1), RV2P_PROC1); + bce_load_rv2p_fw(sc, bce_xi_rv2p_proc2, + sizeof(bce_xi_rv2p_proc2), RV2P_PROC2); } } else { - bce_load_rv2p_fw(sc, bce_rv2p_proc1, - sizeof(bce_rv2p_proc1), RV2P_PROC1); + bce_load_rv2p_fw(sc, bce_rv2p_proc1, + sizeof(bce_rv2p_proc1), RV2P_PROC1); bce_load_rv2p_fw(sc, bce_rv2p_proc2, - sizeof(bce_rv2p_proc2), RV2P_PROC2); + sizeof(bce_rv2p_proc2), RV2P_PROC2); } bce_init_rxp_cpu(sc); @@ -4373,7 +4413,7 @@ bce_init_ctx(struct bce_softc *sc) * in host memory so prepare the host memory * for access. */ - val = BCE_CTX_COMMAND_ENABLED | + val = BCE_CTX_COMMAND_ENABLED | BCE_CTX_COMMAND_MEM_INIT | (1 << 12); val |= (BCM_PAGE_BITS - 8) << 16; REG_WR(sc, BCE_CTX_COMMAND, val); @@ -4406,7 +4446,7 @@ bce_init_ctx(struct bce_softc *sc) /* Verify the context memory write was successful. */ for (j = 0; j < retry_cnt; j++) { val = REG_RD(sc, BCE_CTX_HOST_PAGE_TBL_CTRL); - if ((val & + if ((val & BCE_CTX_HOST_PAGE_TBL_CTRL_WRITE_REQ) == 0) break; DELAY(5); @@ -4461,6 +4501,7 @@ bce_get_mac_addr(struct bce_softc *sc) u32 mac_lo = 0, mac_hi = 0; DBENTER(BCE_VERBOSE_RESET); + /* * The NetXtreme II bootcode populates various NIC * power-on and runtime configuration items in a @@ -4475,7 +4516,7 @@ bce_get_mac_addr(struct bce_softc *sc) if ((mac_lo == 0) && (mac_hi == 0)) { BCE_PRINTF("%s(%d): Invalid Ethernet address!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); } else { sc->eaddr[0] = (u_char)(mac_hi >> 8); sc->eaddr[1] = (u_char)(mac_hi >> 0); @@ -4485,7 +4526,8 @@ bce_get_mac_addr(struct bce_softc *sc) sc->eaddr[5] = (u_char)(mac_lo >> 0); } - DBPRINT(sc, BCE_INFO_MISC, "Permanent Ethernet address = %6D\n", sc->eaddr, ":"); + DBPRINT(sc, BCE_INFO_MISC, "Permanent Ethernet " + "address = %6D\n", sc->eaddr, ":"); DBEXIT(BCE_VERBOSE_RESET); } @@ -4505,14 +4547,15 @@ bce_set_mac_addr(struct bce_softc *sc) /* ToDo: Add support for setting multiple MAC addresses. */ DBENTER(BCE_VERBOSE_RESET); - DBPRINT(sc, BCE_INFO_MISC, "Setting Ethernet address = %6D\n", sc->eaddr, ":"); + DBPRINT(sc, BCE_INFO_MISC, "Setting Ethernet address = " + "%6D\n", sc->eaddr, ":"); val = (mac_addr[0] << 8) | mac_addr[1]; REG_WR(sc, BCE_EMAC_MAC_MATCH0, val); val = (mac_addr[2] << 24) | (mac_addr[3] << 16) | - (mac_addr[4] << 8) | mac_addr[5]; + (mac_addr[4] << 8) | mac_addr[5]; REG_WR(sc, BCE_EMAC_MAC_MATCH1, val); @@ -4598,20 +4641,20 @@ bce_reset(struct bce_softc *sc, u32 reset_code) DBENTER(BCE_VERBOSE_RESET); DBPRINT(sc, BCE_VERBOSE_RESET, "%s(): reset_code = 0x%08X\n", - __FUNCTION__, reset_code); + __FUNCTION__, reset_code); /* Wait for pending PCI transactions to complete. */ REG_WR(sc, BCE_MISC_ENABLE_CLR_BITS, - BCE_MISC_ENABLE_CLR_BITS_TX_DMA_ENABLE | - BCE_MISC_ENABLE_CLR_BITS_DMA_ENGINE_ENABLE | - BCE_MISC_ENABLE_CLR_BITS_RX_DMA_ENABLE | - BCE_MISC_ENABLE_CLR_BITS_HOST_COALESCE_ENABLE); + BCE_MISC_ENABLE_CLR_BITS_TX_DMA_ENABLE | + BCE_MISC_ENABLE_CLR_BITS_DMA_ENGINE_ENABLE | + BCE_MISC_ENABLE_CLR_BITS_RX_DMA_ENABLE | + BCE_MISC_ENABLE_CLR_BITS_HOST_COALESCE_ENABLE); val = REG_RD(sc, BCE_MISC_ENABLE_CLR_BITS); DELAY(5); /* Disable DMA */ if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || - (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { + (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { val = REG_RD(sc, BCE_MISC_NEW_CORE_CTL); val &= ~BCE_MISC_NEW_CORE_CTL_DMA_ENABLE; REG_WR(sc, BCE_MISC_NEW_CORE_CTL, val); @@ -4634,26 +4677,26 @@ bce_reset(struct bce_softc *sc, u32 reset_code) /* Chip reset. */ if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || - (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { + (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { REG_WR(sc, BCE_MISC_COMMAND, BCE_MISC_COMMAND_SW_RESET); REG_RD(sc, BCE_MISC_COMMAND); DELAY(5); val = BCE_PCICFG_MISC_CONFIG_REG_WINDOW_ENA | - BCE_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP; + BCE_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP; pci_write_config(sc->bce_dev, BCE_PCICFG_MISC_CONFIG, val, 4); } else { val = BCE_PCICFG_MISC_CONFIG_CORE_RST_REQ | - BCE_PCICFG_MISC_CONFIG_REG_WINDOW_ENA | - BCE_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP; + BCE_PCICFG_MISC_CONFIG_REG_WINDOW_ENA | + BCE_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP; REG_WR(sc, BCE_PCICFG_MISC_CONFIG, val); /* Allow up to 30us for reset to complete. */ for (i = 0; i < 10; i++) { val = REG_RD(sc, BCE_PCICFG_MISC_CONFIG); if ((val & (BCE_PCICFG_MISC_CONFIG_CORE_RST_REQ | - BCE_PCICFG_MISC_CONFIG_CORE_RST_BSY)) == 0) { + BCE_PCICFG_MISC_CONFIG_CORE_RST_BSY)) == 0) { break; } DELAY(10); @@ -4661,9 +4704,9 @@ bce_reset(struct bce_softc *sc, u32 reset_code) /* Check that reset completed successfully. */ if (val & (BCE_PCICFG_MISC_CONFIG_CORE_RST_REQ | - BCE_PCICFG_MISC_CONFIG_CORE_RST_BSY)) { + BCE_PCICFG_MISC_CONFIG_CORE_RST_BSY)) { BCE_PRINTF("%s(%d): Reset failed!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); rc = EBUSY; goto bce_reset_exit; } @@ -4673,7 +4716,7 @@ bce_reset(struct bce_softc *sc, u32 reset_code) val = REG_RD(sc, BCE_PCI_SWAP_DIAG0); if (val != 0x01020304) { BCE_PRINTF("%s(%d): Byte swap is incorrect!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); rc = ENODEV; goto bce_reset_exit; } @@ -4685,8 +4728,8 @@ bce_reset(struct bce_softc *sc, u32 reset_code) /* Wait for the firmware to finish its initialization. */ rc = bce_fw_sync(sc, BCE_DRV_MSG_DATA_WAIT1 | reset_code); if (rc) - BCE_PRINTF("%s(%d): Firmware did not complete initialization!\n", - __FILE__, __LINE__); + BCE_PRINTF("%s(%d): Firmware did not complete " + "initialization!\n", __FILE__, __LINE__); bce_reset_exit: DBEXIT(BCE_VERBOSE_RESET); @@ -4709,13 +4752,13 @@ bce_chipinit(struct bce_softc *sc) * channels and PCI clock compensation delay. */ val = BCE_DMA_CONFIG_DATA_BYTE_SWAP | - BCE_DMA_CONFIG_DATA_WORD_SWAP | + BCE_DMA_CONFIG_DATA_WORD_SWAP | #if BYTE_ORDER == BIG_ENDIAN - BCE_DMA_CONFIG_CNTL_BYTE_SWAP | + BCE_DMA_CONFIG_CNTL_BYTE_SWAP | #endif - BCE_DMA_CONFIG_CNTL_WORD_SWAP | - DMA_READ_CHANS << 12 | - DMA_WRITE_CHANS << 16; + BCE_DMA_CONFIG_CNTL_WORD_SWAP | + DMA_READ_CHANS << 12 | + DMA_WRITE_CHANS << 16; val |= (0x2 << 20) | BCE_DMA_CONFIG_CNTL_PCI_COMP_DLY; @@ -4765,7 +4808,7 @@ bce_chipinit(struct bce_softc *sc) /* Enable bins used on the 5709. */ if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || - (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { + (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { val |= BCE_MQ_CONFIG_BIN_MQ_MODE; if (BCE_CHIP_ID(sc) == BCE_CHIP_ID_5709_A1) val |= BCE_MQ_CONFIG_HALT_DIS; @@ -4927,7 +4970,7 @@ bce_blockinit(struct bce_softc *sc) } /* Allow bootcode to apply additional fixes before enabling MAC. */ - rc = bce_fw_sync(sc, BCE_DRV_MSG_DATA_WAIT2 | + rc = bce_fw_sync(sc, BCE_DRV_MSG_DATA_WAIT2 | BCE_DRV_MSG_CODE_RESET); /* Enable link state change interrupt generation. */ @@ -4938,7 +4981,7 @@ bce_blockinit(struct bce_softc *sc) /* Disable management frames (NC-SI) from flowing to the MCP. */ if (sc->bce_flags & BCE_MFW_ENABLE_FLAG) { - val = REG_RD(sc, BCE_RPM_MGMT_PKT_CTRL) & + val = REG_RD(sc, BCE_RPM_MGMT_PKT_CTRL) & ~BCE_RPM_MGMT_PKT_CTRL_MGMT_EN; REG_WR(sc, BCE_RPM_MGMT_PKT_CTRL, val); } @@ -4946,10 +4989,10 @@ bce_blockinit(struct bce_softc *sc) /* Enable all remaining blocks in the MAC. */ if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) - REG_WR(sc, BCE_MISC_ENABLE_SET_BITS, + REG_WR(sc, BCE_MISC_ENABLE_SET_BITS, BCE_MISC_ENABLE_DEFAULT_XI); else - REG_WR(sc, BCE_MISC_ENABLE_SET_BITS, + REG_WR(sc, BCE_MISC_ENABLE_SET_BITS, BCE_MISC_ENABLE_DEFAULT); REG_RD(sc, BCE_MISC_ENABLE_SET_BITS); @@ -4988,8 +5031,9 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, /* Make sure the inputs are valid. */ DBRUNIF((*chain_prod > MAX_RX_BD), - BCE_PRINTF("%s(%d): RX producer out of range: 0x%04X > 0x%04X\n", - __FILE__, __LINE__, *chain_prod, (u16) MAX_RX_BD)); + BCE_PRINTF("%s(%d): RX producer out of range: " + "0x%04X > 0x%04X\n", __FILE__, __LINE__, + *chain_prod, (u16) MAX_RX_BD)); DBPRINT(sc, BCE_EXTREME_RECV, "%s(enter): prod = 0x%04X, " "chain_prod = 0x%04X, prod_bseq = 0x%08X\n", __FUNCTION__, @@ -4997,8 +5041,9 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, /* Update some debug statistic counters */ DBRUNIF((sc->free_rx_bd < sc->rx_low_watermark), - sc->rx_low_watermark = sc->free_rx_bd); - DBRUNIF((sc->free_rx_bd == sc->max_rx_bd), sc->rx_empty_count++); + sc->rx_low_watermark = sc->free_rx_bd); + DBRUNIF((sc->free_rx_bd == sc->max_rx_bd), + sc->rx_empty_count++); /* Check whether this is a new mbuf allocation. */ if (m == NULL) { @@ -5014,11 +5059,8 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, #ifdef BCE_JUMBO_HDRSPLIT MGETHDR(m_new, M_DONTWAIT, MT_DATA); #else - if (sc->rx_bd_mbuf_alloc_size <= MCLBYTES) - m_new = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); - else - m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, - sc->rx_bd_mbuf_alloc_size); + m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, + sc->rx_bd_mbuf_alloc_size); #endif if (m_new == NULL) { @@ -5049,8 +5091,8 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, /* Handle any mapping errors. */ if (error) { - BCE_PRINTF("%s(%d): Error mapping mbuf into RX chain (%d)!\n", - __FILE__, __LINE__, error); + BCE_PRINTF("%s(%d): Error mapping mbuf into RX " + "chain (%d)!\n", __FILE__, __LINE__, error); sc->dma_map_addr_rx_failed_count++; m_freem(m_new); @@ -5078,11 +5120,11 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, sc->rx_mbuf_ptr[*chain_prod] = m_new; sc->free_rx_bd -= nsegs; - DBRUNMSG(BCE_INSANE_RECV, + DBRUNMSG(BCE_INSANE_RECV, bce_dump_rx_mbuf_chain(sc, debug_chain_prod, nsegs)); DBPRINT(sc, BCE_EXTREME_RECV, "%s(exit): prod = 0x%04X, " - "chain_prod = 0x%04X, prod_bseq = 0x%08X\n", + "chain_prod = 0x%04X, prod_bseq = 0x%08X\n", __FUNCTION__, *prod, *chain_prod, *prod_bseq); bce_get_rx_buf_exit: @@ -5116,8 +5158,9 @@ bce_get_pg_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, /* Make sure the inputs are valid. */ DBRUNIF((*prod_idx > MAX_PG_BD), - BCE_PRINTF("%s(%d): page producer out of range: 0x%04X > 0x%04X\n", - __FILE__, __LINE__, *prod_idx, (u16) MAX_PG_BD)); + BCE_PRINTF("%s(%d): page producer out of range: " + "0x%04X > 0x%04X\n", __FILE__, __LINE__, + *prod_idx, (u16) MAX_PG_BD)); DBPRINT(sc, BCE_EXTREME_RECV, "%s(enter): prod = 0x%04X, " "chain_prod = 0x%04X\n", __FUNCTION__, *prod, *prod_idx); @@ -5159,7 +5202,7 @@ bce_get_pg_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, /* Map the mbuf cluster into device memory. */ map = sc->pg_mbuf_map[*prod_idx]; error = bus_dmamap_load(sc->pg_mbuf_tag, map, mtod(m_new, void *), - sc->pg_bd_mbuf_alloc_size, bce_dma_map_addr, + sc->pg_bd_mbuf_alloc_size, bce_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); /* Handle any mapping errors. */ @@ -5191,7 +5234,7 @@ bce_get_pg_buf(struct bce_softc *sc, struct mbuf *m, u16 *prod, sc->pg_mbuf_ptr[*prod_idx] = m_new; sc->free_pg_bd--; - DBRUNMSG(BCE_INSANE_RECV, + DBRUNMSG(BCE_INSANE_RECV, bce_dump_pg_mbuf_chain(sc, debug_prod_idx, 1)); DBPRINT(sc, BCE_EXTREME_RECV, "%s(exit): prod = 0x%04X, " @@ -5222,19 +5265,19 @@ bce_init_tx_context(struct bce_softc *sc) if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { /* Set the CID type to support an L2 connection. */ - val = BCE_L2CTX_TX_TYPE_TYPE_L2_XI | + val = BCE_L2CTX_TX_TYPE_TYPE_L2_XI | BCE_L2CTX_TX_TYPE_SIZE_L2_XI; CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_TYPE_XI, val); val = BCE_L2CTX_TX_CMD_TYPE_TYPE_L2_XI | (8 << 16); - CTX_WR(sc, GET_CID_ADDR(TX_CID), + CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_CMD_TYPE_XI, val); /* Point the hardware to the first page in the chain. */ val = BCE_ADDR_HI(sc->tx_bd_chain_paddr[0]); - CTX_WR(sc, GET_CID_ADDR(TX_CID), + CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_TBDR_BHADDR_HI_XI, val); val = BCE_ADDR_LO(sc->tx_bd_chain_paddr[0]); - CTX_WR(sc, GET_CID_ADDR(TX_CID), + CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_TBDR_BHADDR_LO_XI, val); } else { /* Set the CID type to support an L2 connection. */ @@ -5245,10 +5288,10 @@ bce_init_tx_context(struct bce_softc *sc) /* Point the hardware to the first page in the chain. */ val = BCE_ADDR_HI(sc->tx_bd_chain_paddr[0]); - CTX_WR(sc, GET_CID_ADDR(TX_CID), + CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_TBDR_BHADDR_HI, val); val = BCE_ADDR_LO(sc->tx_bd_chain_paddr[0]); - CTX_WR(sc, GET_CID_ADDR(TX_CID), + CTX_WR(sc, GET_CID_ADDR(TX_CID), BCE_L2CTX_TX_TBDR_BHADDR_LO, val); } @@ -5331,7 +5374,7 @@ bce_free_tx_chain(struct bce_softc *sc) for (i = 0; i < TOTAL_TX_BD; i++) { if (sc->tx_mbuf_ptr[i] != NULL) { if (sc->tx_mbuf_map[i] != NULL) - bus_dmamap_sync(sc->tx_mbuf_tag, + bus_dmamap_sync(sc->tx_mbuf_tag, sc->tx_mbuf_map[i], BUS_DMASYNC_POSTWRITE); m_freem(sc->tx_mbuf_ptr[i]); @@ -5349,7 +5392,7 @@ bce_free_tx_chain(struct bce_softc *sc) /* Check if we lost any mbufs in the process. */ DBRUNIF((sc->debug_tx_mbuf_alloc), BCE_PRINTF("%s(%d): Memory leak! Lost %d mbufs " - "from tx chain!\n", __FILE__, __LINE__, + "from tx chain!\n", __FILE__, __LINE__, sc->debug_tx_mbuf_alloc)); DBEXIT(BCE_VERBOSE_RESET | BCE_VERBOSE_SEND | BCE_VERBOSE_UNLOAD); @@ -5385,9 +5428,22 @@ bce_init_rx_context(struct bce_softc *sc) (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { u32 lo_water, hi_water; - lo_water = BCE_L2CTX_RX_LO_WATER_MARK_DEFAULT; + if (sc->bce_flags && BCE_USING_TX_FLOW_CONTROL) { + lo_water = BCE_L2CTX_RX_LO_WATER_MARK_DEFAULT; + } else { + lo_water = 0; + } + + if (lo_water >= USABLE_RX_BD) { + lo_water = 0; + } + hi_water = USABLE_RX_BD / 4; + if (hi_water <= lo_water) { + lo_water = 0; + } + lo_water /= BCE_L2CTX_RX_LO_WATER_MARK_SCALE; hi_water /= BCE_L2CTX_RX_HI_WATER_MARK_SCALE; @@ -5395,11 +5451,12 @@ bce_init_rx_context(struct bce_softc *sc) hi_water = 0xf; else if (hi_water == 0) lo_water = 0; + val |= (lo_water << BCE_L2CTX_RX_LO_WATER_MARK_SHIFT) | - (hi_water << BCE_L2CTX_RX_HI_WATER_MARK_SHIFT); + (hi_water << BCE_L2CTX_RX_HI_WATER_MARK_SHIFT); } - CTX_WR(sc, GET_CID_ADDR(RX_CID), BCE_L2CTX_RX_CTX_TYPE, val); + CTX_WR(sc, GET_CID_ADDR(RX_CID), BCE_L2CTX_RX_CTX_TYPE, val); /* Setup the MQ BIN mapping for l2_ctx_host_bseq. */ if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || @@ -5453,9 +5510,9 @@ bce_init_rx_chain(struct bce_softc *sc) j = i + 1; /* Setup the chain page pointers. */ - rxbd->rx_bd_haddr_hi = + rxbd->rx_bd_haddr_hi = htole32(BCE_ADDR_HI(sc->rx_bd_chain_paddr[j])); - rxbd->rx_bd_haddr_lo = + rxbd->rx_bd_haddr_lo = htole32(BCE_ADDR_LO(sc->rx_bd_chain_paddr[j])); } @@ -5517,14 +5574,14 @@ bce_fill_rx_chain(struct bce_softc *sc) /* We should never end up pointing to a next page pointer. */ DBRUNIF(((prod & USABLE_RX_BD_PER_PAGE) == USABLE_RX_BD_PER_PAGE), - BCE_PRINTF("%s(): Invalid rx_prod value: 0x%04X\n", - __FUNCTION__, sc->rx_prod)); + BCE_PRINTF("%s(): Invalid rx_prod value: 0x%04X\n", + __FUNCTION__, sc->rx_prod)); /* Write the mailbox and tell the chip about the waiting rx_bd's. */ - REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_BDIDX, - sc->rx_prod); - REG_WR(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_BSEQ, - sc->rx_prod_bseq); + REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + + BCE_L2MQ_RX_HOST_BDIDX, sc->rx_prod); + REG_WR(sc, MB_GET_CID_ADDR(RX_CID) + + BCE_L2MQ_RX_HOST_BSEQ, sc->rx_prod_bseq); DBEXIT(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD | BCE_VERBOSE_CTX); @@ -5548,7 +5605,7 @@ bce_free_rx_chain(struct bce_softc *sc) for (i = 0; i < TOTAL_RX_BD; i++) { if (sc->rx_mbuf_ptr[i] != NULL) { if (sc->rx_mbuf_map[i] != NULL) - bus_dmamap_sync(sc->rx_mbuf_tag, + bus_dmamap_sync(sc->rx_mbuf_tag, sc->rx_mbuf_map[i], BUS_DMASYNC_POSTREAD); m_freem(sc->rx_mbuf_ptr[i]); @@ -5560,7 +5617,7 @@ bce_free_rx_chain(struct bce_softc *sc) /* Clear each RX chain page. */ for (i = 0; i < RX_PAGES; i++) if (sc->rx_bd_chain[i] != NULL) { - bzero((char *)sc->rx_bd_chain[i], + bzero((char *)sc->rx_bd_chain[i], BCE_RX_CHAIN_PAGE_SZ); } @@ -5667,7 +5724,7 @@ bce_fill_pg_chain(struct bce_softc *sc) u16 prod, prod_idx; DBENTER(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD | - BCE_VERBOSE_CTX); + BCE_VERBOSE_CTX); /* Get the page chain prodcuer index. */ prod = sc->pg_prod; @@ -5686,18 +5743,18 @@ bce_fill_pg_chain(struct bce_softc *sc) sc->pg_prod = prod; DBRUNIF(((prod & USABLE_RX_BD_PER_PAGE) == USABLE_RX_BD_PER_PAGE), - BCE_PRINTF("%s(): Invalid pg_prod value: 0x%04X\n", - __FUNCTION__, sc->pg_prod)); + BCE_PRINTF("%s(): Invalid pg_prod value: 0x%04X\n", + __FUNCTION__, sc->pg_prod)); /* * Write the mailbox and tell the chip about * the new rx_bd's in the page chain. */ - REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + BCE_L2MQ_RX_HOST_PG_BDIDX, - sc->pg_prod); + REG_WR16(sc, MB_GET_CID_ADDR(RX_CID) + + BCE_L2MQ_RX_HOST_PG_BDIDX, sc->pg_prod); DBEXIT(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD | - BCE_VERBOSE_CTX); + BCE_VERBOSE_CTX); } @@ -5718,8 +5775,9 @@ bce_free_pg_chain(struct bce_softc *sc) for (i = 0; i < TOTAL_PG_BD; i++) { if (sc->pg_mbuf_ptr[i] != NULL) { if (sc->pg_mbuf_map[i] != NULL) - bus_dmamap_sync(sc->pg_mbuf_tag, sc->pg_mbuf_map[i], - BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(sc->pg_mbuf_tag, + sc->pg_mbuf_map[i], + BUS_DMASYNC_POSTREAD); m_freem(sc->pg_mbuf_ptr[i]); sc->pg_mbuf_ptr[i] = NULL; DBRUN(sc->debug_pg_mbuf_alloc--); @@ -5734,8 +5792,8 @@ bce_free_pg_chain(struct bce_softc *sc) /* Check if we lost any mbufs in the process. */ DBRUNIF((sc->debug_pg_mbuf_alloc), - BCE_PRINTF("%s(): Memory leak! Lost %d mbufs from page chain!\n", - __FUNCTION__, sc->debug_pg_mbuf_alloc)); + BCE_PRINTF("%s(): Memory leak! Lost %d mbufs from page chain!\n", + __FUNCTION__, sc->debug_pg_mbuf_alloc)); DBEXIT(BCE_VERBOSE_RESET | BCE_VERBOSE_RECV | BCE_VERBOSE_UNLOAD); } @@ -5776,7 +5834,7 @@ bce_ifmedia_upd_locked(struct ifnet *ifp) struct bce_softc *sc = ifp->if_softc; struct mii_data *mii; - DBENTER(BCE_VERBOSE); + DBENTER(BCE_VERBOSE_PHY); BCE_LOCK_ASSERT(sc); @@ -5794,7 +5852,7 @@ bce_ifmedia_upd_locked(struct ifnet *ifp) mii_mediachg(mii); } - DBEXIT(BCE_VERBOSE); + DBEXIT(BCE_VERBOSE_PHY); } @@ -5810,7 +5868,7 @@ bce_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) struct bce_softc *sc = ifp->if_softc; struct mii_data *mii; - DBENTER(BCE_VERBOSE); + DBENTER(BCE_VERBOSE_PHY); BCE_LOCK(sc); @@ -5822,7 +5880,7 @@ bce_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) BCE_UNLOCK(sc); - DBEXIT(BCE_VERBOSE); + DBEXIT(BCE_VERBOSE_PHY); } @@ -5921,7 +5979,7 @@ bce_rx_intr(struct bce_softc *sc) #endif DBENTER(BCE_VERBOSE_RECV | BCE_VERBOSE_INTR); - DBRUN(sc->rx_interrupts++); + DBRUN(sc->interrupts_rx++); DBPRINT(sc, BCE_EXTREME_RECV, "%s(enter): rx_prod = 0x%04X, " "rx_cons = 0x%04X, rx_prod_bseq = 0x%08X\n", __FUNCTION__, sc->rx_prod, sc->rx_cons, sc->rx_prod_bseq); @@ -5943,14 +6001,16 @@ bce_rx_intr(struct bce_softc *sc) /* Get working copies of the driver's view of the consumer indices. */ sw_rx_cons = sc->rx_cons; + #ifdef BCE_JUMBO_HDRSPLIT sw_pg_cons = sc->pg_cons; #endif /* Update some debug statistics counters */ DBRUNIF((sc->free_rx_bd < sc->rx_low_watermark), - sc->rx_low_watermark = sc->free_rx_bd); - DBRUNIF((sc->free_rx_bd == sc->max_rx_bd), sc->rx_empty_count++); + sc->rx_low_watermark = sc->free_rx_bd); + DBRUNIF((sc->free_rx_bd == sc->max_rx_bd), + sc->rx_empty_count++); /* Scan through the receive chain as long as there is work to do */ /* ToDo: Consider setting a limit on the number of packets processed. */ @@ -5962,7 +6022,7 @@ bce_rx_intr(struct bce_softc *sc) sw_rx_cons_idx = RX_CHAIN_IDX(sw_rx_cons); /* Unmap the mbuf from DMA space. */ - bus_dmamap_sync(sc->rx_mbuf_tag, + bus_dmamap_sync(sc->rx_mbuf_tag, sc->rx_mbuf_map[sw_rx_cons_idx], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rx_mbuf_tag, @@ -5975,7 +6035,7 @@ bce_rx_intr(struct bce_softc *sc) sc->free_rx_bd++; if(m0 == NULL) { - DBPRINT(sc, BCE_EXTREME_RECV, + DBPRINT(sc, BCE_EXTREME_RECV, "%s(): Oops! Empty mbuf pointer " "found in sc->rx_mbuf_ptr[0x%04X]!\n", __FUNCTION__, sw_rx_cons_idx); @@ -5983,20 +6043,23 @@ bce_rx_intr(struct bce_softc *sc) } /* - * Frames received on the NetXteme II are prepended with an - * l2_fhdr structure which provides status information about - * the received frame (including VLAN tags and checksum info). - * The frames are also automatically adjusted to align the IP - * header (i.e. two null bytes are inserted before the Ethernet - * header). As a result the data DMA'd by the controller into - * the mbuf is as follows: - * + * Frames received on the NetXteme II are prepended + * with an l2_fhdr structure which provides status + * information about the received frame (including + * VLAN tags and checksum info). The frames are + * also automatically adjusted to align the IP + * header (i.e. two null bytes are inserted before + * the Ethernet header). As a result the data + * DMA'd by the controller into the mbuf looks + * like this: + * * +---------+-----+---------------------+-----+ * | l2_fhdr | pad | packet data | FCS | * +---------+-----+---------------------+-----+ - * - * The l2_fhdr needs to be checked and skipped and the FCS needs - * to be stripped before sending the packet up the stack. + * + * The l2_fhdr needs to be checked and skipped and + * the FCS needs to be stripped before sending the + * packet up the stack. */ l2fhdr = mtod(m0, struct l2_fhdr *); @@ -6100,7 +6163,7 @@ bce_rx_intr(struct bce_softc *sc) m0->m_pkthdr.len = m0->m_len = pkt_len; } #else - /* Set the total packet length. */ + /* Set the total packet length. */ m0->m_pkthdr.len = m0->m_len = pkt_len; #endif @@ -6115,12 +6178,11 @@ bce_rx_intr(struct bce_softc *sc) m_print(m0, 128)); DBRUNIF(DB_RANDOMTRUE(l2fhdr_error_sim_control), - BCE_PRINTF("Simulating l2_fhdr status error.\n"); sc->l2fhdr_error_sim_count++; status = status | L2_FHDR_ERRORS_PHY_DECODE); /* Check the received frame for errors. */ - if (status & (L2_FHDR_ERRORS_BAD_CRC | + if (status & (L2_FHDR_ERRORS_BAD_CRC | L2_FHDR_ERRORS_PHY_DECODE | L2_FHDR_ERRORS_ALIGNMENT | L2_FHDR_ERRORS_TOO_SHORT | L2_FHDR_ERRORS_GIANT_FRAME)) { @@ -6144,12 +6206,12 @@ bce_rx_intr(struct bce_softc *sc) /* Check for an IP datagram. */ if (!(status & L2_FHDR_STATUS_SPLIT) && - (status & L2_FHDR_STATUS_IP_DATAGRAM)) { + (status & L2_FHDR_STATUS_IP_DATAGRAM)) { m0->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - + DBRUN(sc->csum_offload_ip++); /* Check if the IP checksum is valid. */ if ((l2fhdr->l2_fhdr_ip_xsum ^ 0xffff) == 0) - m0->m_pkthdr.csum_flags |= + m0->m_pkthdr.csum_flags |= CSUM_IP_VALID; } @@ -6160,9 +6222,10 @@ bce_rx_intr(struct bce_softc *sc) /* Check for a good TCP/UDP checksum. */ if ((status & (L2_FHDR_ERRORS_TCP_XSUM | L2_FHDR_ERRORS_UDP_XSUM)) == 0) { + DBRUN(sc->csum_offload_tcp_udp++); m0->m_pkthdr.csum_data = l2fhdr->l2_fhdr_tcp_udp_xsum; - m0->m_pkthdr.csum_flags |= + m0->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } @@ -6234,12 +6297,13 @@ bce_rx_int_next_rx: hw_rx_cons = sc->hw_rx_cons = bce_get_hw_rx_cons(sc); } - /* No new packets to process. Refill the RX and page chains and exit. */ #ifdef BCE_JUMBO_HDRSPLIT + /* No new packets. Refill the page chain. */ sc->pg_cons = sw_pg_cons; bce_fill_pg_chain(sc); #endif + /* No new packets. Refill the RX chain. */ sc->rx_cons = sw_rx_cons; bce_fill_rx_chain(sc); @@ -6295,7 +6359,7 @@ bce_tx_intr(struct bce_softc *sc) u16 hw_tx_cons, sw_tx_cons, sw_tx_chain_cons; DBENTER(BCE_VERBOSE_SEND | BCE_VERBOSE_INTR); - DBRUN(sc->tx_interrupts++); + DBRUN(sc->interrupts_tx++); DBPRINT(sc, BCE_EXTREME_SEND, "%s(enter): tx_prod = 0x%04X, " "tx_cons = 0x%04X, tx_prod_bseq = 0x%08X\n", __FUNCTION__, sc->tx_prod, sc->tx_cons, sc->tx_prod_bseq); @@ -6306,7 +6370,7 @@ bce_tx_intr(struct bce_softc *sc) hw_tx_cons = sc->hw_tx_cons = bce_get_hw_tx_cons(sc); sw_tx_cons = sc->tx_cons; - /* Prevent speculative reads from getting ahead of the status block. */ + /* Prevent speculative reads of the status block. */ bus_space_barrier(sc->bce_btag, sc->bce_bhandle, 0, 0, BUS_SPACE_BARRIER_READ); @@ -6354,7 +6418,7 @@ bce_tx_intr(struct bce_softc *sc) DBRUNMSG(BCE_INFO_SEND, BCE_PRINTF("%s(): Unloading map/freeing mbuf " - "from tx_bd[0x%04X]\n", __FUNCTION__, + "from tx_bd[0x%04X]\n", __FUNCTION__, sw_tx_chain_cons)); /* Unmap the mbuf. */ @@ -6496,9 +6560,9 @@ bce_init_locked(struct bce_softc *sc) * size. Be generous on the receive if we have room. */ #ifdef BCE_JUMBO_HDRSPLIT - if (ifp->if_mtu <= (sc->rx_bd_mbuf_data_len + + if (ifp->if_mtu <= (sc->rx_bd_mbuf_data_len + sc->pg_bd_mbuf_alloc_size)) - ether_mtu = sc->rx_bd_mbuf_data_len + + ether_mtu = sc->rx_bd_mbuf_data_len + sc->pg_bd_mbuf_alloc_size; #else if (ifp->if_mtu <= sc->rx_bd_mbuf_data_len) @@ -6509,7 +6573,7 @@ bce_init_locked(struct bce_softc *sc) ether_mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; - DBPRINT(sc, BCE_INFO_MISC, "%s(): setting h/w mtu = %d\n", + DBPRINT(sc, BCE_INFO_MISC, "%s(): setting h/w mtu = %d\n", __FUNCTION__, ether_mtu); /* Program the mtu, enabling jumbo frame support if necessary. */ @@ -6522,7 +6586,7 @@ bce_init_locked(struct bce_softc *sc) DBPRINT(sc, BCE_INFO_LOAD, "%s(): rx_bd_mbuf_alloc_size = %d, rx_bce_mbuf_data_len = %d, " - "rx_bd_mbuf_align_pad = %d\n", __FUNCTION__, + "rx_bd_mbuf_align_pad = %d\n", __FUNCTION__, sc->rx_bd_mbuf_alloc_size, sc->rx_bd_mbuf_data_len, sc->rx_bd_mbuf_align_pad); @@ -6617,6 +6681,12 @@ bce_init(void *xsc) } +/****************************************************************************/ +/* Modifies an mbuf for TSO on the hardware. */ +/* */ +/* Returns: */ +/* Pointer to a modified mbuf. */ +/****************************************************************************/ static struct mbuf * bce_tso_setup(struct bce_softc *sc, struct mbuf **m_head, u16 *flags) { @@ -6627,8 +6697,9 @@ bce_tso_setup(struct bce_softc *sc, struct mbuf **m_head, u16 *flags) u16 etype; int hdr_len, ip_hlen = 0, tcp_hlen = 0, ip_len = 0; - DBRUN(sc->requested_tso_frames++); - /* Controller requires to monify mbuf chains. */ + DBRUN(sc->tso_frames_requested++); + + /* Controller may modify mbuf chains. */ if (M_WRITABLE(*m_head) == 0) { m = m_dup(*m_head, M_DONTWAIT); m_freem(*m_head); @@ -6639,6 +6710,7 @@ bce_tso_setup(struct bce_softc *sc, struct mbuf **m_head, u16 *flags) } *m_head = m; } + /* * For TSO the controller needs two pieces of info, * the MSS and the IP+TCP options length. @@ -6714,9 +6786,12 @@ bce_tso_setup(struct bce_softc *sc, struct mbuf **m_head, u16 *flags) /* Set the LSO flag in the TX BD */ *flags |= TX_BD_FLAGS_SW_LSO; + /* Set the length of IP + TCP options (in 32 bit words) */ *flags |= (((ip_hlen + tcp_hlen - sizeof(struct ip) - sizeof(struct tcphdr)) >> 2) << 8); + + DBRUN(sc->tso_frames_completed++); return (*m_head); } @@ -6743,22 +6818,24 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) #ifdef BCE_DEBUG u16 debug_prod; #endif + int i, error, nsegs, rc = 0; DBENTER(BCE_VERBOSE_SEND); - DBPRINT(sc, BCE_INFO_SEND, - "%s(enter): tx_prod = 0x%04X, tx_chain_prod = %04X, " - "tx_prod_bseq = 0x%08X\n", - __FUNCTION__, sc->tx_prod, (u16) TX_CHAIN_IDX(sc->tx_prod), - sc->tx_prod_bseq); + + /* Make sure we have room in the TX chain. */ + if (sc->used_tx_bd >= sc->max_tx_bd) + goto bce_tx_encap_exit; /* Transfer any checksum offload flags to the bd. */ m0 = *m_head; if (m0->m_pkthdr.csum_flags) { if (m0->m_pkthdr.csum_flags & CSUM_TSO) { m0 = bce_tso_setup(sc, m_head, &flags); - if (m0 == NULL) + if (m0 == NULL) { + DBRUN(sc->tso_frames_failed++); goto bce_tx_encap_exit; + } mss = htole16(m0->m_pkthdr.tso_segsz); } else { if (m0->m_pkthdr.csum_flags & CSUM_IP) @@ -6785,8 +6862,7 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) /* Check if the DMA mapping was successful */ if (error == EFBIG) { - - sc->fragmented_mbuf_count++; + sc->mbuf_frag_count++; /* Try to defrag the mbuf. */ m0 = m_collapse(*m_head, M_DONTWAIT, BCE_MAX_SEGMENTS); @@ -6801,8 +6877,8 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) /* Defrag was successful, try mapping again */ *m_head = m0; - error = bus_dmamap_load_mbuf_sg(sc->tx_mbuf_tag, map, m0, - segs, &nsegs, BUS_DMA_NOWAIT); + error = bus_dmamap_load_mbuf_sg(sc->tx_mbuf_tag, + map, m0, segs, &nsegs, BUS_DMA_NOWAIT); /* Still getting an error after a defrag. */ if (error == ENOMEM) { @@ -6864,9 +6940,11 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) txbd= &sc->tx_bd_chain[TX_PAGE(chain_prod)] [TX_IDX(chain_prod)]; - txbd->tx_bd_haddr_lo = htole32(BCE_ADDR_LO(segs[i].ds_addr)); - txbd->tx_bd_haddr_hi = htole32(BCE_ADDR_HI(segs[i].ds_addr)); - txbd->tx_bd_mss_nbytes = htole32(mss << 16) | + txbd->tx_bd_haddr_lo = + htole32(BCE_ADDR_LO(segs[i].ds_addr)); + txbd->tx_bd_haddr_hi = + htole32(BCE_ADDR_HI(segs[i].ds_addr)); + txbd->tx_bd_mss_nbytes = htole32(mss << 16) | htole16(segs[i].ds_len); txbd->tx_bd_vlan_tag = htole16(vlan_tag); txbd->tx_bd_flags = htole16(flags); @@ -6879,12 +6957,8 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) /* Set the END flag on the last TX buffer descriptor. */ txbd->tx_bd_flags |= htole16(TX_BD_FLAGS_END); - DBRUNMSG(BCE_EXTREME_SEND, bce_dump_tx_chain(sc, debug_prod, nsegs)); - - DBPRINT(sc, BCE_INFO_SEND, - "%s( end ): prod = 0x%04X, chain_prod = 0x%04X, " - "prod_bseq = 0x%08X\n", - __FUNCTION__, prod, chain_prod, prod_bseq); + DBRUNMSG(BCE_EXTREME_SEND, + bce_dump_tx_chain(sc, debug_prod, nsegs)); /* * Ensure that the mbuf pointer for this transmission @@ -6910,11 +6984,11 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m_head) sc->tx_prod = prod; sc->tx_prod_bseq = prod_bseq; - DBPRINT(sc, BCE_INFO_SEND, - "%s(exit): prod = 0x%04X, chain_prod = %04X, " - "prod_bseq = 0x%08X\n", __FUNCTION__, - sc->tx_prod, (u16) TX_CHAIN_IDX(sc->tx_prod), - sc->tx_prod_bseq); + /* Tell the chip about the waiting TX frames. */ + REG_WR16(sc, MB_GET_CID_ADDR(TX_CID) + + BCE_L2MQ_TX_HOST_BIDX, sc->tx_prod); + REG_WR(sc, MB_GET_CID_ADDR(TX_CID) + + BCE_L2MQ_TX_HOST_BSEQ, sc->tx_prod_bseq); bce_tx_encap_exit: DBEXIT(BCE_VERBOSE_SEND); @@ -7006,23 +7080,6 @@ bce_start_locked(struct ifnet *ifp) DBPRINT(sc, BCE_VERBOSE_SEND, "%s(): Inserted %d frames into " "send queue.\n", __FUNCTION__, count); - REG_WR(sc, BCE_MQ_COMMAND, REG_RD(sc, BCE_MQ_COMMAND) | - BCE_MQ_COMMAND_NO_MAP_ERROR); - - /* Write the mailbox and tell the chip about the waiting tx_bd's. */ - DBPRINT(sc, BCE_VERBOSE_SEND, "%s(): MB_GET_CID_ADDR(TX_CID) = " - "0x%08X; BCE_L2MQ_TX_HOST_BIDX = 0x%08X, sc->tx_prod = 0x%04X\n", - __FUNCTION__, MB_GET_CID_ADDR(TX_CID), - BCE_L2MQ_TX_HOST_BIDX, sc->tx_prod); - REG_WR16(sc, MB_GET_CID_ADDR(TX_CID) + - BCE_L2MQ_TX_HOST_BIDX, sc->tx_prod); - - DBPRINT(sc, BCE_VERBOSE_SEND, "%s(): MB_GET_CID_ADDR(TX_CID) = " - "0x%08X; BCE_L2MQ_TX_HOST_BSEQ = 0x%08X, sc->tx_prod_bseq = " - "0x%04X\n", __FUNCTION__, MB_GET_CID_ADDR(TX_CID), - BCE_L2MQ_TX_HOST_BSEQ, sc->tx_prod_bseq); - REG_WR(sc, MB_GET_CID_ADDR(TX_CID) + BCE_L2MQ_TX_HOST_BSEQ, sc->tx_prod_bseq); - /* Set the tx timeout. */ sc->watchdog_timer = BCE_TX_TIMEOUT; @@ -7103,19 +7160,19 @@ bce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* No buffer allocation size changes are necessary. */ #else /* Recalculate our buffer allocation sizes. */ - if ((ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + + if ((ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN) > MCLBYTES) { sc->rx_bd_mbuf_alloc_size = MJUM9BYTES; - sc->rx_bd_mbuf_align_pad = + sc->rx_bd_mbuf_align_pad = roundup2(MJUM9BYTES, 16) - MJUM9BYTES; - sc->rx_bd_mbuf_data_len = + sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - sc->rx_bd_mbuf_align_pad; } else { sc->rx_bd_mbuf_alloc_size = MCLBYTES; - sc->rx_bd_mbuf_align_pad = + sc->rx_bd_mbuf_align_pad = roundup2(MCLBYTES, 16) - MCLBYTES; - sc->rx_bd_mbuf_data_len = + sc->rx_bd_mbuf_data_len = sc->rx_bd_mbuf_alloc_size - sc->rx_bd_mbuf_align_pad; } @@ -7161,7 +7218,7 @@ bce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Add/Delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: - DBPRINT(sc, BCE_VERBOSE_MISC, + DBPRINT(sc, BCE_VERBOSE_MISC, "Received SIOCADDMULTI/SIOCDELMULTI\n"); BCE_LOCK(sc); @@ -7174,7 +7231,7 @@ bce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Set/Get Interface media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: - DBPRINT(sc, BCE_VERBOSE_MISC, + DBPRINT(sc, BCE_VERBOSE_MISC, "Received SIOCSIFMEDIA/SIOCGIFMEDIA\n"); mii = device_get_softc(sc->bce_miibus); @@ -7185,7 +7242,7 @@ bce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Set interface capability */ case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; - DBPRINT(sc, BCE_INFO_MISC, + DBPRINT(sc, BCE_INFO_MISC, "Received SIOCSIFCAP = 0x%08X\n", (u32) mask); /* Toggle the TX checksum capabilities enable flag. */ @@ -7270,7 +7327,7 @@ bce_watchdog(struct bce_softc *sc) goto bce_watchdog_exit; BCE_PRINTF("%s(%d): Watchdog timeout occurred, resetting!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); DBRUNMSG(BCE_INFO, bce_dump_driver_state(sc); @@ -7336,7 +7393,7 @@ bce_intr(void *xsc) * interrupt then there's nothing to do. */ if ((sc->status_block->status_idx == sc->last_status_idx) && - (REG_RD(sc, BCE_PCICFG_MISC_STATUS) & + (REG_RD(sc, BCE_PCICFG_MISC_STATUS) & BCE_PCICFG_MISC_STATUS_INTA_VALUE)) { DBPRINT(sc, BCE_VERBOSE_INTR, "%s(): Spurious interrupt.\n", __FUNCTION__); @@ -7361,17 +7418,17 @@ bce_intr(void *xsc) BCE_PRINTF("Simulating unexpected status attention " "bit set."); sc->unexpected_attention_sim_count++; - status_attn_bits = status_attn_bits | + status_attn_bits = status_attn_bits | STATUS_ATTN_BITS_PARITY_ERROR); /* Was it a link change interrupt? */ if ((status_attn_bits & STATUS_ATTN_BITS_LINK_STATE) != - (sc->status_block->status_attn_bits_ack & + (sc->status_block->status_attn_bits_ack & STATUS_ATTN_BITS_LINK_STATE)) { bce_phy_intr(sc); /* Clear transient updates during link state change. */ - REG_WR(sc, BCE_HC_COMMAND, sc->hc_command | + REG_WR(sc, BCE_HC_COMMAND, sc->hc_command | BCE_HC_COMMAND_COAL_NOW_WO_INT); REG_RD(sc, BCE_HC_COMMAND); } @@ -7384,7 +7441,7 @@ bce_intr(void *xsc) sc->unexpected_attention_count++; BCE_PRINTF("%s(%d): Fatal attention detected: " - "0x%08X\n", __FILE__, __LINE__, + "0x%08X\n", __FILE__, __LINE__, sc->status_block->status_attn_bits); DBRUNMSG(BCE_FATAL, @@ -7420,7 +7477,7 @@ bce_intr(void *xsc) hw_rx_cons = bce_get_hw_rx_cons(sc); hw_tx_cons = bce_get_hw_tx_cons(sc); - if ((hw_rx_cons == sc->hw_rx_cons) && + if ((hw_rx_cons == sc->hw_rx_cons) && (hw_tx_cons == sc->hw_tx_cons)) break; @@ -7433,7 +7490,7 @@ bce_intr(void *xsc) bce_enable_intr(sc, 0); /* Handle any frames that arrived while handling the interrupt. */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING && + if (ifp->if_drv_flags & IFF_DRV_RUNNING && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) bce_start_locked(ifp); @@ -7560,7 +7617,7 @@ bce_stats_update(struct bce_softc *sc) */ if (!(BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5706) && !(BCE_CHIP_ID(sc) == BCE_CHIP_ID_5708_A0)) - ifp->if_oerrors += + ifp->if_oerrors += (u_long) stats->stat_Dot3StatsCarrierSenseErrors; /* @@ -7799,25 +7856,28 @@ bce_pulse(void *xsc) sc->bc_state = bce_shmem_rd(sc, BCE_BC_STATE_CONDITION); /* Report whether the bootcode still knows the driver is running. */ - if (sc->bce_drv_cardiac_arrest == FALSE) { - if (!(sc->bc_state & BCE_CONDITION_DRV_PRESENT)) { - sc->bce_drv_cardiac_arrest = TRUE; - BCE_PRINTF("%s(): Bootcode lost the driver pulse! " - "(bc_state = 0x%08X)\n", __FUNCTION__, - sc->bc_state); - } - } else { - /* - * Not supported by all bootcode versions. - * (v5.0.11+ and v5.2.1+) Older bootcode - * will require the driver to reset the - * controller to clear this condition. - */ - if (sc->bc_state & BCE_CONDITION_DRV_PRESENT) { - sc->bce_drv_cardiac_arrest = FALSE; - BCE_PRINTF("%s(): Bootcode found the driver pulse! " - "(bc_state = 0x%08X)\n", __FUNCTION__, - sc->bc_state); + if (bootverbose) { + if (sc->bce_drv_cardiac_arrest == FALSE) { + if (!(sc->bc_state & BCE_CONDITION_DRV_PRESENT)) { + sc->bce_drv_cardiac_arrest = TRUE; + BCE_PRINTF("%s(): Warning: bootcode " + "thinks driver is absent! " + "(bc_state = 0x%08X)\n", + __FUNCTION__, sc->bc_state); + } + } else { + /* + * Not supported by all bootcode versions. + * (v5.0.11+ and v5.2.1+) Older bootcode + * will require the driver to reset the + * controller to clear this condition. + */ + if (sc->bc_state & BCE_CONDITION_DRV_PRESENT) { + sc->bce_drv_cardiac_arrest = FALSE; + BCE_PRINTF("%s(): Bootcode found the " + "driver pulse! (bc_state = 0x%08X)\n", + __FUNCTION__, sc->bc_state); + } } } @@ -7874,7 +7934,7 @@ bce_tick(void *xsc) /* Check if the link has come up. */ if ((mii->mii_media_status & IFM_ACTIVE) && (IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE)) { - DBPRINT(sc, BCE_VERBOSE_MISC, + DBPRINT(sc, BCE_VERBOSE_MISC, "%s(): Link up!\n", __FUNCTION__); sc->bce_link_up = TRUE; if ((IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T || @@ -8010,6 +8070,62 @@ bce_sysctl_stats_block(SYSCTL_HANDLER_ARGS) } +/****************************************************************************/ +/* Allows the stat counters to be cleared without unloading/reloading the */ +/* driver. */ +/* */ +/* Returns: */ +/* 0 for success, positive value for failure. */ +/****************************************************************************/ +static int +bce_sysctl_stats_clear(SYSCTL_HANDLER_ARGS) +{ + int error; + int result; + struct bce_softc *sc; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + if (result == 1) { + sc = (struct bce_softc *)arg1; + + /* Clear the internal H/W statistics counters. */ + REG_WR(sc, BCE_HC_COMMAND, BCE_HC_COMMAND_CLR_STAT_NOW); + + /* Reset the driver maintained statistics. */ + sc->interrupts_rx = + sc->interrupts_tx = 0; + sc->tso_frames_requested = + sc->tso_frames_completed = + sc->tso_frames_failed = 0; + sc->rx_empty_count = + sc->tx_full_count = 0; + sc->rx_low_watermark = USABLE_RX_BD; + sc->tx_hi_watermark = 0; + sc->l2fhdr_error_count = + sc->l2fhdr_error_sim_count = 0; + sc->mbuf_alloc_failed_count = + sc->mbuf_alloc_failed_sim_count = 0; + sc->dma_map_addr_rx_failed_count = + sc->dma_map_addr_tx_failed_count = 0; + sc->mbuf_frag_count = 0; + sc->csum_offload_tcp_udp = + sc->csum_offload_ip = 0; + sc->vlan_tagged_frames_rcvd = + sc->vlan_tagged_frames_stripped = 0; + + /* Clear firmware maintained statistics. */ + REG_WR_IND(sc, 0x120084, 0); + } + + return error; +} + + /****************************************************************************/ /* Allows the bootcode state to be dumped through the sysctl interface. */ /* */ @@ -8252,8 +8368,7 @@ static int bce_sysctl_dump_ctx(SYSCTL_HANDLER_ARGS) { struct bce_softc *sc; - int error; - u16 result; + int error, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); @@ -8352,8 +8467,8 @@ bce_add_sysctls(struct bce_softc *sc) 0, "Number of mbuf allocation failures"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "fragmented_mbuf_count", - CTLFLAG_RD, &sc->fragmented_mbuf_count, + "mbuf_frag_count", + CTLFLAG_RD, &sc->mbuf_frag_count, 0, "Number of fragmented mbufs"); #ifdef BCE_DEBUG @@ -8367,7 +8482,7 @@ bce_add_sysctls(struct bce_softc *sc) "dma_map_addr_failed_sim_count", CTLFLAG_RD, &sc->dma_map_addr_failed_sim_count, 0, "Number of simulated DMA mapping failures"); - + #endif SYSCTL_ADD_INT(ctx, children, OID_AUTO, @@ -8424,18 +8539,48 @@ bce_add_sysctls(struct bce_softc *sc) 0, "Number of times the TX chain was full"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, - "requested_tso_frames", - CTLFLAG_RD, &sc->requested_tso_frames, - 0, "Number of TSO frames received"); + "tso_frames_requested", + CTLFLAG_RD, &sc->tso_frames_requested, + 0, "Number of TSO frames requested"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tso_frames_completed", + CTLFLAG_RD, &sc->tso_frames_completed, + 0, "Number of TSO frames completed"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "tso_frames_failed", + CTLFLAG_RD, &sc->tso_frames_failed, + 0, "Number of TSO frames failed"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "csum_offload_ip", + CTLFLAG_RD, &sc->csum_offload_ip, + 0, "Number of IP checksum offload frames"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "csum_offload_tcp_udp", + CTLFLAG_RD, &sc->csum_offload_tcp_udp, + 0, "Number of TCP/UDP checksum offload frames"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "vlan_tagged_frames_rcvd", + CTLFLAG_RD, &sc->vlan_tagged_frames_rcvd, + 0, "Number of VLAN tagged frames received"); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "vlan_tagged_frames_stripped", + CTLFLAG_RD, &sc->vlan_tagged_frames_stripped, + 0, "Number of VLAN tagged frames stripped"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, - "rx_interrupts", - CTLFLAG_RD, &sc->rx_interrupts, + "interrupts_rx", + CTLFLAG_RD, &sc->interrupts_rx, 0, "Number of RX interrupts"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, - "tx_interrupts", - CTLFLAG_RD, &sc->tx_interrupts, + "interrupts_tx", + CTLFLAG_RD, &sc->interrupts_tx, 0, "Number of TX interrupts"); #endif @@ -8730,12 +8875,17 @@ bce_add_sysctls(struct bce_softc *sc) SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "status_block", CTLTYPE_INT | CTLFLAG_RW, (void *)sc, 0, - bce_sysctl_status_block, "I", "Status block"); + bce_sysctl_status_block, "I", "Dump status block"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats_block", CTLTYPE_INT | CTLFLAG_RW, (void *)sc, 0, - bce_sysctl_stats_block, "I", "Stats block"); + bce_sysctl_stats_block, "I", "Dump statistics block"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, + "stats_clear", CTLTYPE_INT | CTLFLAG_RW, + (void *)sc, 0, + bce_sysctl_stats_clear, "I", "Clear statistics block"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "bc_state", CTLTYPE_INT | CTLFLAG_RW, @@ -8882,18 +9032,18 @@ bce_dump_enet(struct bce_softc *sc, struct mbuf *m) switch (ip->ip_p) { case IPPROTO_TCP: th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - BCE_PRINTF("-tcp: dest = %d, src = %d, hlen = %d bytes, " - "flags = 0x%b, csum = 0x%04X\n", - ntohs(th->th_dport), ntohs(th->th_sport), - (th->th_off << 2), th->th_flags, + BCE_PRINTF("-tcp: dest = %d, src = %d, hlen = " + "%d bytes, flags = 0x%b, csum = 0x%04X\n", + ntohs(th->th_dport), ntohs(th->th_sport), + (th->th_off << 2), th->th_flags, "\20\10CWR\07ECE\06URG\05ACK\04PSH\03RST" "\02SYN\01FIN", ntohs(th->th_sum)); break; case IPPROTO_UDP: uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); BCE_PRINTF("-udp: dest = %d, src = %d, len = %d " - "bytes, csum = 0x%04X\n", ntohs(uh->uh_dport), - ntohs(uh->uh_sport), ntohs(uh->uh_ulen), + "bytes, csum = 0x%04X\n", ntohs(uh->uh_dport), + ntohs(uh->uh_sport), ntohs(uh->uh_ulen), ntohs(uh->uh_sum)); break; case IPPROTO_ICMP: @@ -8960,10 +9110,10 @@ bce_dump_mbuf(struct bce_softc *sc, struct mbuf *m) if (mp->m_flags & M_PKTHDR) { BCE_PRINTF("- m_pkthdr: len = %d, flags = 0x%b, " - "csum_flags = %b\n", mp->m_pkthdr.len, + "csum_flags = %b\n", mp->m_pkthdr.len, mp->m_flags, "\20\12M_BCAST\13M_MCAST\14M_FRAG" "\15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG" - "\22M_PROMISC\23M_NOFREE", + "\22M_PROMISC\23M_NOFREE", mp->m_pkthdr.csum_flags, "\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS" "\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED" @@ -8995,7 +9145,7 @@ bce_dump_mbuf(struct bce_softc *sc, struct mbuf *m) printf("EXT_DISPOSABLE\n"); break; case EXT_EXTREF: printf("EXT_EXTREF\n"); break; - default: + default: printf("UNKNOWN\n"); } } @@ -9116,96 +9266,96 @@ bce_dump_txbd(struct bce_softc *sc, int idx, struct tx_bd *txbd) else if ((idx & USABLE_TX_BD_PER_PAGE) == USABLE_TX_BD_PER_PAGE) /* TX Chain page pointer. */ BCE_PRINTF("tx_bd[0x%04X]: haddr = 0x%08X:%08X, chain page " - "pointer\n", idx, txbd->tx_bd_haddr_hi, + "pointer\n", idx, txbd->tx_bd_haddr_hi, txbd->tx_bd_haddr_lo); else { /* Normal tx_bd entry. */ BCE_PRINTF("tx_bd[0x%04X]: haddr = 0x%08X:%08X, " "mss_nbytes = 0x%08X, vlan tag = 0x%04X, flags = " - "0x%04X (", idx, txbd->tx_bd_haddr_hi, - txbd->tx_bd_haddr_lo, txbd->tx_bd_mss_nbytes, + "0x%04X (", idx, txbd->tx_bd_haddr_hi, + txbd->tx_bd_haddr_lo, txbd->tx_bd_mss_nbytes, txbd->tx_bd_vlan_tag, txbd->tx_bd_flags); if (txbd->tx_bd_flags & TX_BD_FLAGS_CONN_FAULT) { - if (i>0) - printf("|"); - printf("CONN_FAULT"); + if (i>0) + printf("|"); + printf("CONN_FAULT"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_TCP_UDP_CKSUM) { - if (i>0) - printf("|"); - printf("TCP_UDP_CKSUM"); + if (i>0) + printf("|"); + printf("TCP_UDP_CKSUM"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_IP_CKSUM) { - if (i>0) - printf("|"); - printf("IP_CKSUM"); + if (i>0) + printf("|"); + printf("IP_CKSUM"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_VLAN_TAG) { - if (i>0) - printf("|"); - printf("VLAN"); + if (i>0) + printf("|"); + printf("VLAN"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_COAL_NOW) { - if (i>0) - printf("|"); - printf("COAL_NOW"); + if (i>0) + printf("|"); + printf("COAL_NOW"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_DONT_GEN_CRC) { - if (i>0) - printf("|"); - printf("DONT_GEN_CRC"); + if (i>0) + printf("|"); + printf("DONT_GEN_CRC"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_START) { - if (i>0) - printf("|"); - printf("START"); + if (i>0) + printf("|"); + printf("START"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_END) { - if (i>0) - printf("|"); - printf("END"); + if (i>0) + printf("|"); + printf("END"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_SW_LSO) { - if (i>0) - printf("|"); - printf("LSO"); + if (i>0) + printf("|"); + printf("LSO"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_SW_OPTION_WORD) { - if (i>0) - printf("|"); - printf("SW_OPTION=%d", ((txbd->tx_bd_flags & + if (i>0) + printf("|"); + printf("SW_OPTION=%d", ((txbd->tx_bd_flags & TX_BD_FLAGS_SW_OPTION_WORD) >> 8)); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_SW_FLAGS) { - if (i>0) - printf("|"); - printf("SW_FLAGS"); + if (i>0) + printf("|"); + printf("SW_FLAGS"); i++; } if (txbd->tx_bd_flags & TX_BD_FLAGS_SW_SNAP) { - if (i>0) - printf("|"); + if (i>0) + printf("|"); printf("SNAP)"); } else { printf(")\n"); @@ -9229,13 +9379,13 @@ bce_dump_rxbd(struct bce_softc *sc, int idx, struct rx_bd *rxbd) else if ((idx & USABLE_RX_BD_PER_PAGE) == USABLE_RX_BD_PER_PAGE) /* RX Chain page pointer. */ BCE_PRINTF("rx_bd[0x%04X]: haddr = 0x%08X:%08X, chain page " - "pointer\n", idx, rxbd->rx_bd_haddr_hi, + "pointer\n", idx, rxbd->rx_bd_haddr_hi, rxbd->rx_bd_haddr_lo); else /* Normal rx_bd entry. */ BCE_PRINTF("rx_bd[0x%04X]: haddr = 0x%08X:%08X, nbytes = " - "0x%08X, flags = 0x%08X\n", idx, rxbd->rx_bd_haddr_hi, - rxbd->rx_bd_haddr_lo, rxbd->rx_bd_len, + "0x%08X, flags = 0x%08X\n", idx, rxbd->rx_bd_haddr_hi, + rxbd->rx_bd_haddr_lo, rxbd->rx_bd_len, rxbd->rx_bd_flags); } @@ -9325,57 +9475,57 @@ bce_dump_ctx(struct bce_softc *sc, u16 cid) "index\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_NX_BDIDX)); BCE_PRINTF(" 0x%08X - (L2CTX_RX_HOST_PG_BDIDX) host page " - "producer index\n", CTX_RD(sc, GET_CID_ADDR(cid), + "producer index\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_HOST_PG_BDIDX)); BCE_PRINTF(" 0x%08X - (L2CTX_RX_PG_BUF_SIZE) host rx_bd/page " - "buffer size\n", CTX_RD(sc, GET_CID_ADDR(cid), + "buffer size\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_PG_BUF_SIZE)); BCE_PRINTF(" 0x%08X - (L2CTX_RX_NX_PG_BDHADDR_HI) h/w page " - "chain address\n", CTX_RD(sc, GET_CID_ADDR(cid), + "chain address\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_NX_PG_BDHADDR_HI)); BCE_PRINTF(" 0x%08X - (L2CTX_RX_NX_PG_BDHADDR_LO) h/w page " - "chain address\n", CTX_RD(sc, GET_CID_ADDR(cid), + "chain address\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_NX_PG_BDHADDR_LO)); BCE_PRINTF(" 0x%08X - (L2CTX_RX_NX_PG_BDIDX) h/w page " - "consumer index\n", CTX_RD(sc, GET_CID_ADDR(cid), + "consumer index\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_RX_NX_PG_BDIDX)); } else if (cid == TX_CID) { if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) { BCE_PRINTF(" 0x%08X - (L2CTX_TX_TYPE_XI) ctx type\n", - CTX_RD(sc, GET_CID_ADDR(cid), + CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TYPE_XI)); BCE_PRINTF(" 0x%08X - (L2CTX_CMD_TX_TYPE_XI) ctx " - "cmd\n", CTX_RD(sc, GET_CID_ADDR(cid), + "cmd\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_CMD_TYPE_XI)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_TBDR_BDHADDR_HI_XI) " - "h/w buffer descriptor address\n", - CTX_RD(sc, GET_CID_ADDR(cid), + "h/w buffer descriptor address\n", + CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TBDR_BHADDR_HI_XI)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_TBDR_BHADDR_LO_XI) " - "h/w buffer descriptor address\n", + "h/w buffer descriptor address\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TBDR_BHADDR_LO_XI)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_HOST_BIDX_XI) " - "host producer index\n", + "host producer index\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_HOST_BIDX_XI)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_HOST_BSEQ_XI) " - "host byte sequence\n", + "host byte sequence\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_HOST_BSEQ_XI)); } else { BCE_PRINTF(" 0x%08X - (L2CTX_TX_TYPE) ctx type\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TYPE)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_CMD_TYPE) ctx cmd\n", - CTX_RD(sc, GET_CID_ADDR(cid), + CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_CMD_TYPE)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_TBDR_BDHADDR_HI) " - "h/w buffer descriptor address\n", + "h/w buffer descriptor address\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TBDR_BHADDR_HI)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_TBDR_BHADDR_LO) " - "h/w buffer descriptor address\n", + "h/w buffer descriptor address\n", CTX_RD(sc, GET_CID_ADDR(cid), BCE_L2CTX_TX_TBDR_BHADDR_LO)); BCE_PRINTF(" 0x%08X - (L2CTX_TX_HOST_BIDX) host " @@ -9605,8 +9755,8 @@ bce_dump_ftqs(struct bce_softc *sc) if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) || (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716)) - val = val | - (BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PCSQ_VALID_CNT_XI << + val = val | + (BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PCSQ_VALID_CNT_XI << 24); REG_WR(sc, BCE_HC_STAT_GEN_SEL_0, val); @@ -10020,11 +10170,11 @@ bce_dump_driver_state(struct bce_softc *sc) BCE_PRINTF(" 0x%08X - (sc->interrupts_generated) " "h/w intrs\n", sc->interrupts_generated); - BCE_PRINTF(" 0x%08X - (sc->rx_interrupts) " - "rx interrupts handled\n", sc->rx_interrupts); + BCE_PRINTF(" 0x%08X - (sc->interrupts_rx) " + "rx interrupts handled\n", sc->interrupts_rx); - BCE_PRINTF(" 0x%08X - (sc->tx_interrupts) " - "tx interrupts handled\n", sc->tx_interrupts); + BCE_PRINTF(" 0x%08X - (sc->interrupts_tx) " + "tx interrupts handled\n", sc->interrupts_tx); BCE_PRINTF(" 0x%08X - (sc->phy_interrupts) " "phy interrupts handled\n", sc->phy_interrupts); @@ -10122,15 +10272,15 @@ bce_dump_hw_state(struct bce_softc *sc) val, BCE_MISC_ENABLE_STATUS_BITS); val = REG_RD(sc, BCE_DMA_STATUS); - BCE_PRINTF("0x%08X - (0x%06X) dma_status\n", + BCE_PRINTF("0x%08X - (0x%06X) dma_status\n", val, BCE_DMA_STATUS); val = REG_RD(sc, BCE_CTX_STATUS); - BCE_PRINTF("0x%08X - (0x%06X) ctx_status\n", + BCE_PRINTF("0x%08X - (0x%06X) ctx_status\n", val, BCE_CTX_STATUS); val = REG_RD(sc, BCE_EMAC_STATUS); - BCE_PRINTF("0x%08X - (0x%06X) emac_status\n", + BCE_PRINTF("0x%08X - (0x%06X) emac_status\n", val, BCE_EMAC_STATUS); val = REG_RD(sc, BCE_RPM_STATUS); @@ -10139,16 +10289,16 @@ bce_dump_hw_state(struct bce_softc *sc) /* ToDo: Create a #define for this constant. */ val = REG_RD(sc, 0x2004); - BCE_PRINTF("0x%08X - (0x%06X) rlup_status\n", + BCE_PRINTF("0x%08X - (0x%06X) rlup_status\n", val, 0x2004); val = REG_RD(sc, BCE_RV2P_STATUS); - BCE_PRINTF("0x%08X - (0x%06X) rv2p_status\n", + BCE_PRINTF("0x%08X - (0x%06X) rv2p_status\n", val, BCE_RV2P_STATUS); /* ToDo: Create a #define for this constant. */ val = REG_RD(sc, 0x2c04); - BCE_PRINTF("0x%08X - (0x%06X) rdma_status\n", + BCE_PRINTF("0x%08X - (0x%06X) rdma_status\n", val, 0x2c04); val = REG_RD(sc, BCE_TBDR_STATUS); @@ -10156,7 +10306,7 @@ bce_dump_hw_state(struct bce_softc *sc) val, BCE_TBDR_STATUS); val = REG_RD(sc, BCE_TDMA_STATUS); - BCE_PRINTF("0x%08X - (0x%06X) tdma_status\n", + BCE_PRINTF("0x%08X - (0x%06X) tdma_status\n", val, BCE_TDMA_STATUS); val = REG_RD(sc, BCE_HC_STATUS); @@ -10164,27 +10314,27 @@ bce_dump_hw_state(struct bce_softc *sc) val, BCE_HC_STATUS); val = REG_RD_IND(sc, BCE_TXP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_state\n", val, BCE_TXP_CPU_STATE); val = REG_RD_IND(sc, BCE_TPAT_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_state\n", val, BCE_TPAT_CPU_STATE); val = REG_RD_IND(sc, BCE_RXP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_state\n", val, BCE_RXP_CPU_STATE); val = REG_RD_IND(sc, BCE_COM_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) com_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) com_cpu_state\n", val, BCE_COM_CPU_STATE); val = REG_RD_IND(sc, BCE_MCP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) mcp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) mcp_cpu_state\n", val, BCE_MCP_CPU_STATE); val = REG_RD_IND(sc, BCE_CP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_state\n", val, BCE_CP_CPU_STATE); BCE_PRINTF( @@ -10306,15 +10456,15 @@ bce_dump_txp_state(struct bce_softc *sc, int regs) BCE_PRINTF("Firmware version - %s\n", (char *) fw_version); val = REG_RD_IND(sc, BCE_TXP_CPU_MODE); - BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_mode\n", + BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_mode\n", val, BCE_TXP_CPU_MODE); val = REG_RD_IND(sc, BCE_TXP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_state\n", val, BCE_TXP_CPU_STATE); val = REG_RD_IND(sc, BCE_TXP_CPU_EVENT_MASK); - BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_event_mask\n", + BCE_PRINTF("0x%08X - (0x%06X) txp_cpu_event_mask\n", val, BCE_TXP_CPU_EVENT_MASK); if (regs) { @@ -10327,8 +10477,8 @@ bce_dump_txp_state(struct bce_softc *sc, int regs) /* Skip the big blank spaces */ if (i < 0x454000 && i > 0x5ffff) BCE_PRINTF("0x%04X: 0x%08X 0x%08X " - "0x%08X 0x%08X\n", i, - REG_RD_IND(sc, i), + "0x%08X 0x%08X\n", i, + REG_RD_IND(sc, i), REG_RD_IND(sc, i + 0x4), REG_RD_IND(sc, i + 0x8), REG_RD_IND(sc, i + 0xC)); @@ -10366,15 +10516,15 @@ bce_dump_rxp_state(struct bce_softc *sc, int regs) BCE_PRINTF("Firmware version - %s\n", (char *) fw_version); val = REG_RD_IND(sc, BCE_RXP_CPU_MODE); - BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_mode\n", + BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_mode\n", val, BCE_RXP_CPU_MODE); val = REG_RD_IND(sc, BCE_RXP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_state\n", val, BCE_RXP_CPU_STATE); val = REG_RD_IND(sc, BCE_RXP_CPU_EVENT_MASK); - BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_event_mask\n", + BCE_PRINTF("0x%08X - (0x%06X) rxp_cpu_event_mask\n", val, BCE_RXP_CPU_EVENT_MASK); if (regs) { @@ -10387,10 +10537,10 @@ bce_dump_rxp_state(struct bce_softc *sc, int regs) /* Skip the big blank sapces */ if (i < 0xc5400 && i > 0xdffff) BCE_PRINTF("0x%04X: 0x%08X 0x%08X " - "0x%08X 0x%08X\n", i, - REG_RD_IND(sc, i), + "0x%08X 0x%08X\n", i, + REG_RD_IND(sc, i), REG_RD_IND(sc, i + 0x4), - REG_RD_IND(sc, i + 0x8), + REG_RD_IND(sc, i + 0x8), REG_RD_IND(sc, i + 0xC)); } } @@ -10426,15 +10576,15 @@ bce_dump_tpat_state(struct bce_softc *sc, int regs) BCE_PRINTF("Firmware version - %s\n", (char *) fw_version); val = REG_RD_IND(sc, BCE_TPAT_CPU_MODE); - BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_mode\n", + BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_mode\n", val, BCE_TPAT_CPU_MODE); val = REG_RD_IND(sc, BCE_TPAT_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_state\n", val, BCE_TPAT_CPU_STATE); val = REG_RD_IND(sc, BCE_TPAT_CPU_EVENT_MASK); - BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_event_mask\n", + BCE_PRINTF("0x%08X - (0x%06X) tpat_cpu_event_mask\n", val, BCE_TPAT_CPU_EVENT_MASK); if (regs) { @@ -10448,9 +10598,9 @@ bce_dump_tpat_state(struct bce_softc *sc, int regs) if (i < 0x854000 && i > 0x9ffff) BCE_PRINTF("0x%04X: 0x%08X 0x%08X " "0x%08X 0x%08X\n", i, - REG_RD_IND(sc, i), + REG_RD_IND(sc, i), REG_RD_IND(sc, i + 0x4), - REG_RD_IND(sc, i + 0x8), + REG_RD_IND(sc, i + 0x8), REG_RD_IND(sc, i + 0xC)); } } @@ -10486,11 +10636,11 @@ bce_dump_cp_state(struct bce_softc *sc, int regs) BCE_PRINTF("Firmware version - %s\n", (char *) fw_version); val = REG_RD_IND(sc, BCE_CP_CPU_MODE); - BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_mode\n", + BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_mode\n", val, BCE_CP_CPU_MODE); val = REG_RD_IND(sc, BCE_CP_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) cp_cpu_state\n", val, BCE_CP_CPU_STATE); val = REG_RD_IND(sc, BCE_CP_CPU_EVENT_MASK); @@ -10507,10 +10657,10 @@ bce_dump_cp_state(struct bce_softc *sc, int regs) /* Skip the big blank spaces */ if (i < 0x185400 && i > 0x19ffff) BCE_PRINTF("0x%04X: 0x%08X 0x%08X " - "0x%08X 0x%08X\n", i, - REG_RD_IND(sc, i), + "0x%08X 0x%08X\n", i, + REG_RD_IND(sc, i), REG_RD_IND(sc, i + 0x4), - REG_RD_IND(sc, i + 0x8), + REG_RD_IND(sc, i + 0x8), REG_RD_IND(sc, i + 0xC)); } } @@ -10546,11 +10696,11 @@ bce_dump_com_state(struct bce_softc *sc, int regs) BCE_PRINTF("Firmware version - %s\n", (char *) fw_version); val = REG_RD_IND(sc, BCE_COM_CPU_MODE); - BCE_PRINTF("0x%08X - (0x%06X) com_cpu_mode\n", + BCE_PRINTF("0x%08X - (0x%06X) com_cpu_mode\n", val, BCE_COM_CPU_MODE); val = REG_RD_IND(sc, BCE_COM_CPU_STATE); - BCE_PRINTF("0x%08X - (0x%06X) com_cpu_state\n", + BCE_PRINTF("0x%08X - (0x%06X) com_cpu_state\n", val, BCE_COM_CPU_STATE); val = REG_RD_IND(sc, BCE_COM_CPU_EVENT_MASK); @@ -10565,8 +10715,8 @@ bce_dump_com_state(struct bce_softc *sc, int regs) for (int i = BCE_COM_CPU_MODE; i < 0x1053e8; i += 0x10) { BCE_PRINTF("0x%04X: 0x%08X 0x%08X " - "0x%08X 0x%08X\n", i, - REG_RD_IND(sc, i), + "0x%08X 0x%08X\n", i, + REG_RD_IND(sc, i), REG_RD_IND(sc, i + 0x4), REG_RD_IND(sc, i + 0x8), REG_RD_IND(sc, i + 0xC)); @@ -10605,17 +10755,17 @@ bce_dump_rv2p_state(struct bce_softc *sc) val = 0x00000001; REG_WR_IND(sc, BCE_RV2P_PROC1_ADDR_CMD, val); fw_ver_low = REG_RD_IND(sc, BCE_RV2P_INSTR_LOW); - fw_ver_high = REG_RD_IND(sc, BCE_RV2P_INSTR_HIGH) & + fw_ver_high = REG_RD_IND(sc, BCE_RV2P_INSTR_HIGH) & BCE_RV2P_INSTR_HIGH_HIGH; - BCE_PRINTF("RV2P1 Firmware version - 0x%08X:0x%08X\n", + BCE_PRINTF("RV2P1 Firmware version - 0x%08X:0x%08X\n", fw_ver_high, fw_ver_low); val = 0x00000001; REG_WR_IND(sc, BCE_RV2P_PROC2_ADDR_CMD, val); fw_ver_low = REG_RD_IND(sc, BCE_RV2P_INSTR_LOW); - fw_ver_high = REG_RD_IND(sc, BCE_RV2P_INSTR_HIGH) & + fw_ver_high = REG_RD_IND(sc, BCE_RV2P_INSTR_HIGH) & BCE_RV2P_INSTR_HIGH_HIGH; - BCE_PRINTF("RV2P2 Firmware version - 0x%08X:0x%08X\n", + BCE_PRINTF("RV2P2 Firmware version - 0x%08X:0x%08X\n", fw_ver_high, fw_ver_low); /* Resume the RV2P processors. */ diff --git a/sys/dev/bce/if_bcereg.h b/sys/dev/bce/if_bcereg.h index 49bba6377f8..d6e7c242d63 100644 --- a/sys/dev/bce/if_bcereg.h +++ b/sys/dev/bce/if_bcereg.h @@ -284,8 +284,8 @@ "\02EnaPauseRcv" \ "\01EnaPausXmit" -/* - * Remove before release: +/* + * Remove before release: * * #define BCE_DEBUG * #define BCE_NVRAM_WRITE_SUPPORT @@ -439,7 +439,7 @@ } /* Runs a particular command based on the code path. */ -#define DBRUNCP(cp, args...) \ +#define DBRUNCP(cp, args...) \ if (BCE_CODE_PATH(cp)) { \ args; \ } @@ -4116,7 +4116,7 @@ struct l2_fhdr { #define BCE_RDMA_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) - + /* * timer_reg definition * offset: 0x4400 @@ -4476,107 +4476,107 @@ struct l2_fhdr { * csch_reg definition * offset: 0x4000 */ -#define BCE_CSCH_COMMAND 0x00004000 -#define BCE_CSCH_CH_FTQ_CMD 0x000043f8 -#define BCE_CSCH_CH_FTQ_CTL 0x000043fc -#define BCE_CSCH_CH_FTQ_CTL_MAX_DEPTH (0x3ffL<<12) -#define BCE_CSCH_CH_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) +#define BCE_CSCH_COMMAND 0x00004000 +#define BCE_CSCH_CH_FTQ_CMD 0x000043f8 +#define BCE_CSCH_CH_FTQ_CTL 0x000043fc +#define BCE_CSCH_CH_FTQ_CTL_MAX_DEPTH (0x3ffL<<12) +#define BCE_CSCH_CH_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) /* * tbdr_reg definition * offset: 0x5000 */ -#define BCE_TBDR_COMMAND 0x00005000 -#define BCE_TBDR_COMMAND_ENABLE (1L<<0) -#define BCE_TBDR_COMMAND_SOFT_RST (1L<<1) -#define BCE_TBDR_COMMAND_MSTR_ABORT (1L<<4) +#define BCE_TBDR_COMMAND 0x00005000 +#define BCE_TBDR_COMMAND_ENABLE (1L<<0) +#define BCE_TBDR_COMMAND_SOFT_RST (1L<<1) +#define BCE_TBDR_COMMAND_MSTR_ABORT (1L<<4) -#define BCE_TBDR_STATUS 0x00005004 -#define BCE_TBDR_STATUS_DMA_WAIT (1L<<0) -#define BCE_TBDR_STATUS_FTQ_WAIT (1L<<1) -#define BCE_TBDR_STATUS_FIFO_OVERFLOW (1L<<2) -#define BCE_TBDR_STATUS_FIFO_UNDERFLOW (1L<<3) -#define BCE_TBDR_STATUS_SEARCHMISS_ERROR (1L<<4) -#define BCE_TBDR_STATUS_FTQ_ENTRY_CNT (1L<<5) -#define BCE_TBDR_STATUS_BURST_CNT (1L<<6) +#define BCE_TBDR_STATUS 0x00005004 +#define BCE_TBDR_STATUS_DMA_WAIT (1L<<0) +#define BCE_TBDR_STATUS_FTQ_WAIT (1L<<1) +#define BCE_TBDR_STATUS_FIFO_OVERFLOW (1L<<2) +#define BCE_TBDR_STATUS_FIFO_UNDERFLOW (1L<<3) +#define BCE_TBDR_STATUS_SEARCHMISS_ERROR (1L<<4) +#define BCE_TBDR_STATUS_FTQ_ENTRY_CNT (1L<<5) +#define BCE_TBDR_STATUS_BURST_CNT (1L<<6) -#define BCE_TBDR_CONFIG 0x00005008 -#define BCE_TBDR_CONFIG_MAX_BDS (0xffL<<0) -#define BCE_TBDR_CONFIG_SWAP_MODE (1L<<8) -#define BCE_TBDR_CONFIG_PRIORITY (1L<<9) +#define BCE_TBDR_CONFIG 0x00005008 +#define BCE_TBDR_CONFIG_MAX_BDS (0xffL<<0) +#define BCE_TBDR_CONFIG_SWAP_MODE (1L<<8) +#define BCE_TBDR_CONFIG_PRIORITY (1L<<9) #define BCE_TBDR_CONFIG_CACHE_NEXT_PAGE_PTRS (1L<<10) -#define BCE_TBDR_CONFIG_PAGE_SIZE (0xfL<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_256 (0L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_512 (1L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_1K (2L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_2K (3L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_4K (4L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_8K (5L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_16K (6L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_32K (7L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_64K (8L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_128K (9L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_256K (10L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_512K (11L<<24) -#define BCE_TBDR_CONFIG_PAGE_SIZE_1M (12L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE (0xfL<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_256 (0L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_512 (1L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_1K (2L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_2K (3L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_4K (4L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_8K (5L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_16K (6L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_32K (7L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_64K (8L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_128K (9L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_256K (10L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_512K (11L<<24) +#define BCE_TBDR_CONFIG_PAGE_SIZE_1M (12L<<24) -#define BCE_TBDR_DEBUG_VECT_PEEK 0x0000500c -#define BCE_TBDR_DEBUG_VECT_PEEK_1_VALUE (0x7ffL<<0) -#define BCE_TBDR_DEBUG_VECT_PEEK_1_PEEK_EN (1L<<11) -#define BCE_TBDR_DEBUG_VECT_PEEK_1_SEL (0xfL<<12) -#define BCE_TBDR_DEBUG_VECT_PEEK_2_VALUE (0x7ffL<<16) -#define BCE_TBDR_DEBUG_VECT_PEEK_2_PEEK_EN (1L<<27) -#define BCE_TBDR_DEBUG_VECT_PEEK_2_SEL (0xfL<<28) +#define BCE_TBDR_DEBUG_VECT_PEEK 0x0000500c +#define BCE_TBDR_DEBUG_VECT_PEEK_1_VALUE (0x7ffL<<0) +#define BCE_TBDR_DEBUG_VECT_PEEK_1_PEEK_EN (1L<<11) +#define BCE_TBDR_DEBUG_VECT_PEEK_1_SEL (0xfL<<12) +#define BCE_TBDR_DEBUG_VECT_PEEK_2_VALUE (0x7ffL<<16) +#define BCE_TBDR_DEBUG_VECT_PEEK_2_PEEK_EN (1L<<27) +#define BCE_TBDR_DEBUG_VECT_PEEK_2_SEL (0xfL<<28) -#define BCE_TBDR_FTQ_DATA 0x000053c0 -#define BCE_TBDR_FTQ_CMD 0x000053f8 -#define BCE_TBDR_FTQ_CMD_OFFSET (0x3ffL<<0) -#define BCE_TBDR_FTQ_CMD_WR_TOP (1L<<10) -#define BCE_TBDR_FTQ_CMD_WR_TOP_0 (0L<<10) -#define BCE_TBDR_FTQ_CMD_WR_TOP_1 (1L<<10) -#define BCE_TBDR_FTQ_CMD_SFT_RESET (1L<<25) -#define BCE_TBDR_FTQ_CMD_RD_DATA (1L<<26) -#define BCE_TBDR_FTQ_CMD_ADD_INTERVEN (1L<<27) -#define BCE_TBDR_FTQ_CMD_ADD_DATA (1L<<28) -#define BCE_TBDR_FTQ_CMD_INTERVENE_CLR (1L<<29) -#define BCE_TBDR_FTQ_CMD_POP (1L<<30) -#define BCE_TBDR_FTQ_CMD_BUSY (1L<<31) +#define BCE_TBDR_FTQ_DATA 0x000053c0 +#define BCE_TBDR_FTQ_CMD 0x000053f8 +#define BCE_TBDR_FTQ_CMD_OFFSET (0x3ffL<<0) +#define BCE_TBDR_FTQ_CMD_WR_TOP (1L<<10) +#define BCE_TBDR_FTQ_CMD_WR_TOP_0 (0L<<10) +#define BCE_TBDR_FTQ_CMD_WR_TOP_1 (1L<<10) +#define BCE_TBDR_FTQ_CMD_SFT_RESET (1L<<25) +#define BCE_TBDR_FTQ_CMD_RD_DATA (1L<<26) +#define BCE_TBDR_FTQ_CMD_ADD_INTERVEN (1L<<27) +#define BCE_TBDR_FTQ_CMD_ADD_DATA (1L<<28) +#define BCE_TBDR_FTQ_CMD_INTERVENE_CLR (1L<<29) +#define BCE_TBDR_FTQ_CMD_POP (1L<<30) +#define BCE_TBDR_FTQ_CMD_BUSY (1L<<31) -#define BCE_TBDR_FTQ_CTL 0x000053fc -#define BCE_TBDR_FTQ_CTL_INTERVENE (1L<<0) -#define BCE_TBDR_FTQ_CTL_OVERFLOW (1L<<1) -#define BCE_TBDR_FTQ_CTL_FORCE_INTERVENE (1L<<2) -#define BCE_TBDR_FTQ_CTL_MAX_DEPTH (0x3ffL<<12) -#define BCE_TBDR_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) +#define BCE_TBDR_FTQ_CTL 0x000053fc +#define BCE_TBDR_FTQ_CTL_INTERVENE (1L<<0) +#define BCE_TBDR_FTQ_CTL_OVERFLOW (1L<<1) +#define BCE_TBDR_FTQ_CTL_FORCE_INTERVENE (1L<<2) +#define BCE_TBDR_FTQ_CTL_MAX_DEPTH (0x3ffL<<12) +#define BCE_TBDR_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) /* * tdma_reg definition * offset: 0x5c00 */ -#define BCE_TDMA_COMMAND 0x00005c00 -#define BCE_TDMA_COMMAND_ENABLED (1L<<0) -#define BCE_TDMA_COMMAND_MASTER_ABORT (1L<<4) -#define BCE_TDMA_COMMAND_BAD_L2_LENGTH_ABORT (1L<<7) +#define BCE_TDMA_COMMAND 0x00005c00 +#define BCE_TDMA_COMMAND_ENABLED (1L<<0) +#define BCE_TDMA_COMMAND_MASTER_ABORT (1L<<4) +#define BCE_TDMA_COMMAND_BAD_L2_LENGTH_ABORT (1L<<7) -#define BCE_TDMA_STATUS 0x00005c04 -#define BCE_TDMA_STATUS_DMA_WAIT (1L<<0) -#define BCE_TDMA_STATUS_PAYLOAD_WAIT (1L<<1) -#define BCE_TDMA_STATUS_PATCH_FTQ_WAIT (1L<<2) +#define BCE_TDMA_STATUS 0x00005c04 +#define BCE_TDMA_STATUS_DMA_WAIT (1L<<0) +#define BCE_TDMA_STATUS_PAYLOAD_WAIT (1L<<1) +#define BCE_TDMA_STATUS_PATCH_FTQ_WAIT (1L<<2) #define BCE_TDMA_STATUS_LOCK_WAIT (1L<<3) #define BCE_TDMA_STATUS_FTQ_ENTRY_CNT (1L<<16) #define BCE_TDMA_STATUS_BURST_CNT (1L<<17) -#define BCE_TDMA_CONFIG 0x00005c08 -#define BCE_TDMA_CONFIG_ONE_DMA (1L<<0) +#define BCE_TDMA_CONFIG 0x00005c08 +#define BCE_TDMA_CONFIG_ONE_DMA (1L<<0) #define BCE_TDMA_CONFIG_ONE_RECORD (1L<<1) #define BCE_TDMA_CONFIG_LIMIT_SZ (0xfL<<4) #define BCE_TDMA_CONFIG_LIMIT_SZ_64 (0L<<4) #define BCE_TDMA_CONFIG_LIMIT_SZ_128 (0x4L<<4) #define BCE_TDMA_CONFIG_LIMIT_SZ_256 (0x6L<<4) #define BCE_TDMA_CONFIG_LIMIT_SZ_512 (0x8L<<4) -#define BCE_TDMA_CONFIG_LINE_SZ (0xfL<<8) +#define BCE_TDMA_CONFIG_LINE_SZ (0xfL<<8) #define BCE_TDMA_CONFIG_LINE_SZ_64 (0L<<8) #define BCE_TDMA_CONFIG_LINE_SZ_128 (4L<<8) #define BCE_TDMA_CONFIG_LINE_SZ_256 (6L<<8) @@ -4612,7 +4612,7 @@ struct l2_fhdr { #define BCE_TDMA_DR_INTF_FSM_DR_BUF (0x7L<<12) #define BCE_TDMA_DR_INTF_FSM_DMAD (0x7L<<16) -#define BCE_TDMA_DR_INTF_STATUS 0x00005c8c +#define BCE_TDMA_DR_INTF_STATUS 0x00005c8c #define BCE_TDMA_DR_INTF_STATUS_HOLE_PHASE (0x7L<<0) #define BCE_TDMA_DR_INTF_STATUS_DATA_AVAIL (0x3L<<4) #define BCE_TDMA_DR_INTF_STATUS_SHIFT_ADDR (0x7L<<8) @@ -4621,8 +4621,8 @@ struct l2_fhdr { #define BCE_TDMA_FTQ_DATA 0x00005fc0 #define BCE_TDMA_FTQ_CMD 0x00005ff8 -#define BCE_TDMA_FTQ_CMD_OFFSET (0x3ffL<<0) -#define BCE_TDMA_FTQ_CMD_WR_TOP (1L<<10) +#define BCE_TDMA_FTQ_CMD_OFFSET (0x3ffL<<0) +#define BCE_TDMA_FTQ_CMD_WR_TOP (1L<<10) #define BCE_TDMA_FTQ_CMD_WR_TOP_0 (0L<<10) #define BCE_TDMA_FTQ_CMD_WR_TOP_1 (1L<<10) #define BCE_TDMA_FTQ_CMD_SFT_RESET (1L<<25) @@ -4645,7 +4645,7 @@ struct l2_fhdr { * nvm_reg definition * offset: 0x6400 */ -#define BCE_NVM_COMMAND 0x00006400 +#define BCE_NVM_COMMAND 0x00006400 #define BCE_NVM_COMMAND_RST (1L<<0) #define BCE_NVM_COMMAND_DONE (1L<<3) #define BCE_NVM_COMMAND_DOIT (1L<<4) @@ -4694,23 +4694,23 @@ struct l2_fhdr { #define BCE_NVM_READ_NVM_READ_VALUE_SI (32L<<0) #define BCE_NVM_CFG1 0x00006414 -#define BCE_NVM_CFG1_FLASH_MODE (1L<<0) +#define BCE_NVM_CFG1_FLASH_MODE (1L<<0) #define BCE_NVM_CFG1_BUFFER_MODE (1L<<1) #define BCE_NVM_CFG1_PASS_MODE (1L<<2) #define BCE_NVM_CFG1_BITBANG_MODE (1L<<3) -#define BCE_NVM_CFG1_STATUS_BIT (0x7L<<4) +#define BCE_NVM_CFG1_STATUS_BIT (0x7L<<4) #define BCE_NVM_CFG1_STATUS_BIT_FLASH_RDY (0L<<4) #define BCE_NVM_CFG1_STATUS_BIT_BUFFER_RDY (7L<<4) #define BCE_NVM_CFG1_SPI_CLK_DIV (0xfL<<7) #define BCE_NVM_CFG1_SEE_CLK_DIV (0x7ffL<<11) #define BCE_NVM_CFG1_PROTECT_MODE (1L<<24) -#define BCE_NVM_CFG1_FLASH_SIZE (1L<<25) +#define BCE_NVM_CFG1_FLASH_SIZE (1L<<25) #define BCE_NVM_CFG1_COMPAT_BYPASSS (1L<<31) #define BCE_NVM_CFG2 0x00006418 #define BCE_NVM_CFG2_ERASE_CMD (0xffL<<0) #define BCE_NVM_CFG2_DUMMY (0xffL<<8) -#define BCE_NVM_CFG2_STATUS_CMD (0xffL<<16) +#define BCE_NVM_CFG2_STATUS_CMD (0xffL<<16) #define BCE_NVM_CFG3 0x0000641c #define BCE_NVM_CFG3_BUFFER_RD_CMD (0xffL<<0) @@ -4727,10 +4727,10 @@ struct l2_fhdr { #define BCE_NVM_SW_ARB_ARB_REQ_CLR1 (1L<<5) #define BCE_NVM_SW_ARB_ARB_REQ_CLR2 (1L<<6) #define BCE_NVM_SW_ARB_ARB_REQ_CLR3 (1L<<7) -#define BCE_NVM_SW_ARB_ARB_ARB0 (1L<<8) -#define BCE_NVM_SW_ARB_ARB_ARB1 (1L<<9) -#define BCE_NVM_SW_ARB_ARB_ARB2 (1L<<10) -#define BCE_NVM_SW_ARB_ARB_ARB3 (1L<<11) +#define BCE_NVM_SW_ARB_ARB_ARB0 (1L<<8) +#define BCE_NVM_SW_ARB_ARB_ARB1 (1L<<9) +#define BCE_NVM_SW_ARB_ARB_ARB2 (1L<<10) +#define BCE_NVM_SW_ARB_ARB_ARB3 (1L<<11) #define BCE_NVM_SW_ARB_REQ0 (1L<<12) #define BCE_NVM_SW_ARB_REQ1 (1L<<13) #define BCE_NVM_SW_ARB_REQ2 (1L<<14) @@ -4741,8 +4741,8 @@ struct l2_fhdr { #define BCE_NVM_ACCESS_ENABLE_WR_EN (1L<<1) #define BCE_NVM_WRITE1 0x00006428 -#define BCE_NVM_WRITE1_WREN_CMD (0xffL<<0) -#define BCE_NVM_WRITE1_WRDI_CMD (0xffL<<8) +#define BCE_NVM_WRITE1_WREN_CMD (0xffL<<0) +#define BCE_NVM_WRITE1_WRDI_CMD (0xffL<<8) #define BCE_NVM_WRITE1_SR_DATA (0xffL<<16) @@ -4753,7 +4753,7 @@ struct l2_fhdr { #define BCE_HC_COMMAND 0x00006800 #define BCE_HC_COMMAND_ENABLE (1L<<0) #define BCE_HC_COMMAND_SKIP_ABORT (1L<<4) -#define BCE_HC_COMMAND_COAL_NOW (1L<<16) +#define BCE_HC_COMMAND_COAL_NOW (1L<<16) #define BCE_HC_COMMAND_COAL_NOW_WO_INT (1L<<17) #define BCE_HC_COMMAND_STATS_NOW (1L<<18) #define BCE_HC_COMMAND_FORCE_INT (0x3L<<19) @@ -4769,7 +4769,7 @@ struct l2_fhdr { #define BCE_HC_STATUS_MASTER_ABORT (1L<<0) #define BCE_HC_STATUS_PARITY_ERROR_STATE (1L<<1) #define BCE_HC_STATUS_PCI_CLK_CNT_STAT (1L<<16) -#define BCE_HC_STATUS_CORE_CLK_CNT_STAT (1L<<17) +#define BCE_HC_STATUS_CORE_CLK_CNT_STAT (1L<<17) #define BCE_HC_STATUS_NUM_STATUS_BLOCKS_STAT (1L<<18) #define BCE_HC_STATUS_NUM_INT_GEN_STAT (1L<<19) #define BCE_HC_STATUS_NUM_INT_MBOX_WR_STAT (1L<<20) @@ -4790,27 +4790,27 @@ struct l2_fhdr { #define BCE_HC_CONFIG_ONE_SHOT (1L<<17) #define BCE_HC_CONFIG_USE_INT_PARAM (1L<<18) #define BCE_HC_CONFIG_SET_MASK_AT_RD (1L<<19) -#define BCE_HC_CONFIG_PER_COLLECT_LIMIT (0xfL<<20) +#define BCE_HC_CONFIG_PER_COLLECT_LIMIT (0xfL<<20) #define BCE_HC_CONFIG_SB_ADDR_INC (0x7L<<24) #define BCE_HC_CONFIG_SB_ADDR_INC_64B (0L<<24) #define BCE_HC_CONFIG_SB_ADDR_INC_128B (1L<<24) #define BCE_HC_CONFIG_SB_ADDR_INC_256B (2L<<24) #define BCE_HC_CONFIG_SB_ADDR_INC_512B (3L<<24) -#define BCE_HC_CONFIG_SB_ADDR_INC_1024B (4L<<24) -#define BCE_HC_CONFIG_SB_ADDR_INC_2048B (5L<<24) -#define BCE_HC_CONFIG_SB_ADDR_INC_4096B (6L<<24) -#define BCE_HC_CONFIG_SB_ADDR_INC_8192B (7L<<24) -#define BCE_HC_CONFIG_GEN_STAT_AVG_INTR (1L<<29) +#define BCE_HC_CONFIG_SB_ADDR_INC_1024B (4L<<24) +#define BCE_HC_CONFIG_SB_ADDR_INC_2048B (5L<<24) +#define BCE_HC_CONFIG_SB_ADDR_INC_4096B (6L<<24) +#define BCE_HC_CONFIG_SB_ADDR_INC_8192B (7L<<24) +#define BCE_HC_CONFIG_GEN_STAT_AVG_INTR (1L<<29) #define BCE_HC_CONFIG_UNMASK_ALL (1L<<30) #define BCE_HC_CONFIG_TX_SEL (1L<<31) -#define BCE_HC_ATTN_BITS_ENABLE 0x0000680c +#define BCE_HC_ATTN_BITS_ENABLE 0x0000680c #define BCE_HC_STATUS_ADDR_L 0x00006810 #define BCE_HC_STATUS_ADDR_H 0x00006814 #define BCE_HC_STATISTICS_ADDR_L 0x00006818 #define BCE_HC_STATISTICS_ADDR_H 0x0000681c #define BCE_HC_TX_QUICK_CONS_TRIP 0x00006820 -#define BCE_HC_TX_QUICK_CONS_TRIP_VALUE (0xffL<<0) +#define BCE_HC_TX_QUICK_CONS_TRIP_VALUE (0xffL<<0) #define BCE_HC_TX_QUICK_CONS_TRIP_INT (0xffL<<16) #define BCE_HC_COMP_PROD_TRIP 0x00006824 @@ -4818,14 +4818,14 @@ struct l2_fhdr { #define BCE_HC_COMP_PROD_TRIP_INT (0xffL<<16) #define BCE_HC_RX_QUICK_CONS_TRIP 0x00006828 -#define BCE_HC_RX_QUICK_CONS_TRIP_VALUE (0xffL<<0) +#define BCE_HC_RX_QUICK_CONS_TRIP_VALUE (0xffL<<0) #define BCE_HC_RX_QUICK_CONS_TRIP_INT (0xffL<<16) -#define BCE_HC_RX_TICKS 0x0000682c +#define BCE_HC_RX_TICKS 0x0000682c #define BCE_HC_RX_TICKS_VALUE (0x3ffL<<0) #define BCE_HC_RX_TICKS_INT (0x3ffL<<16) -#define BCE_HC_TX_TICKS 0x00006830 +#define BCE_HC_TX_TICKS 0x00006830 #define BCE_HC_TX_TICKS_VALUE (0x3ffL<<0) #define BCE_HC_TX_TICKS_INT (0x3ffL<<16) @@ -4838,7 +4838,7 @@ struct l2_fhdr { #define BCE_HC_CMD_TICKS_INT (0x3ffL<<16) #define BCE_HC_PERIODIC_TICKS 0x0000683c -#define BCE_HC_PERIODIC_TICKS_HC_PERIODIC_TICKS (0xffffL<<0) +#define BCE_HC_PERIODIC_TICKS_HC_PERIODIC_TICKS (0xffffL<<0) #define BCE_HC_PERIODIC_TICKS_HC_INT_PERIODIC_TICKS (0xffffL<<16) #define BCE_HC_STAT_COLLECT_TICKS 0x00006840 @@ -4848,12 +4848,12 @@ struct l2_fhdr { #define BCE_HC_STATS_TICKS_HC_STAT_TICKS (0xffffL<<8) #define BCE_HC_STATS_INTERRUPT_STATUS 0x00006848 -#define BCE_HC_STATS_INTERRUPT_STATUS_SB_STATUS (0x1ffL<<0) +#define BCE_HC_STATS_INTERRUPT_STATUS_SB_STATUS (0x1ffL<<0) #define BCE_HC_STATS_INTERRUPT_STATUS_INT_STATUS (0x1ffL<<16) #define BCE_HC_STAT_MEM_DATA 0x0000684c #define BCE_HC_STAT_GEN_SEL_0 0x00006850 -#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0 (0x7fL<<0) +#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0 (0x7fL<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RXP_STAT0 (0L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RXP_STAT1 (1L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RXP_STAT2 (2L<<0) @@ -4921,9 +4921,9 @@ struct l2_fhdr { #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RLUPQ_VALID_CNT (66L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RXPQ_VALID_CNT (67L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RXPCQ_VALID_CNT (68L<<0) -#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PPQ_VALID_CNT (69L<<0) -#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PMQ_VALID_CNT (70L<<0) -#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PTQ_VALID_CNT (71L<<0) +#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PPQ_VALID_CNT (69L<<0) +#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PMQ_VALID_CNT (70L<<0) +#define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RV2PTQ_VALID_CNT (71L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_RDMAQ_VALID_CNT (72L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_TSCHQ_VALID_CNT (73L<<0) #define BCE_HC_STAT_GEN_SEL_0_GEN_SEL_0_TBDRQ_VALID_CNT (74L<<0) @@ -6006,7 +6006,7 @@ struct l2_fhdr { #define BCE_TAS_FTQ_CTL_MAX_DEPTH (0x3ffL<<12) #define BCE_TAS_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) - + /* * mcp_reg definition * offset: 0x140000 @@ -6372,7 +6372,7 @@ struct fw_info { struct bce_softc { - /* Interface info */ + /* Interface info. Must be first!! */ struct ifnet *bce_ifp; /* Parent device handle */ @@ -6403,167 +6403,168 @@ struct bce_softc /* Interrupt handler. */ driver_intr_t *bce_intr; - void *bce_intrhand; - int bce_irq_rid; - int bce_msi_count; + void *bce_intrhand; + int bce_irq_rid; + int bce_msi_count; /* ASIC Chip ID. */ - u32 bce_chipid; + u32 bce_chipid; /* General controller flags. */ - u32 bce_flags; -#define BCE_PCIX_FLAG 0x00000001 -#define BCE_PCI_32BIT_FLAG 0x00000002 -#define BCE_RESERVED_FLAG 0x00000004 -#define BCE_NO_WOL_FLAG 0x00000008 -#define BCE_USING_DAC_FLAG 0x00000010 -#define BCE_USING_MSI_FLAG 0x00000020 -#define BCE_MFW_ENABLE_FLAG 0x00000040 -#define BCE_ONE_SHOT_MSI_FLAG 0x00000080 -#define BCE_USING_MSIX_FLAG 0x00000100 -#define BCE_PCIE_FLAG 0x00000200 + u32 bce_flags; +#define BCE_PCIX_FLAG 0x00000001 +#define BCE_PCI_32BIT_FLAG 0x00000002 +#define BCE_RESERVED_FLAG 0x00000004 +#define BCE_NO_WOL_FLAG 0x00000008 +#define BCE_USING_DAC_FLAG 0x00000010 +#define BCE_USING_MSI_FLAG 0x00000020 +#define BCE_MFW_ENABLE_FLAG 0x00000040 +#define BCE_ONE_SHOT_MSI_FLAG 0x00000080 +#define BCE_USING_MSIX_FLAG 0x00000100 +#define BCE_PCIE_FLAG 0x00000200 +#define BCE_USING_TX_FLOW_CONTROL 0x00000400 /* Controller capability flags. */ - u32 bce_cap_flags; -#define BCE_MSI_CAPABLE_FLAG 0x00000001 -#define BCE_MSIX_CAPABLE_FLAG 0x00000002 -#define BCE_PCIE_CAPABLE_FLAG 0x00000004 -#define BCE_PCIX_CAPABLE_FLAG 0x00000008 + u32 bce_cap_flags; +#define BCE_MSI_CAPABLE_FLAG 0x00000001 +#define BCE_MSIX_CAPABLE_FLAG 0x00000002 +#define BCE_PCIE_CAPABLE_FLAG 0x00000004 +#define BCE_PCIX_CAPABLE_FLAG 0x00000008 /* PHY specific flags. */ - u32 bce_phy_flags; -#define BCE_PHY_SERDES_FLAG 0x00000001 -#define BCE_PHY_CRC_FIX_FLAG 0x00000002 -#define BCE_PHY_PARALLEL_DETECT_FLAG 0x00000004 -#define BCE_PHY_2_5G_CAPABLE_FLAG 0x00000008 -#define BCE_PHY_INT_MODE_MASK_FLAG 0x00000300 -#define BCE_PHY_INT_MODE_AUTO_POLLING_FLAG 0x00000100 -#define BCE_PHY_INT_MODE_LINK_READY_FLAG 0x00000200 -#define BCE_PHY_IEEE_CLAUSE_45_FLAG 0x00000400 + u32 bce_phy_flags; +#define BCE_PHY_SERDES_FLAG 0x00000001 +#define BCE_PHY_CRC_FIX_FLAG 0x00000002 +#define BCE_PHY_PARALLEL_DETECT_FLAG 0x00000004 +#define BCE_PHY_2_5G_CAPABLE_FLAG 0x00000008 +#define BCE_PHY_INT_MODE_MASK_FLAG 0x00000300 +#define BCE_PHY_INT_MODE_AUTO_POLLING_FLAG 0x00000100 +#define BCE_PHY_INT_MODE_LINK_READY_FLAG 0x00000200 +#define BCE_PHY_IEEE_CLAUSE_45_FLAG 0x00000400 /* Values that need to be shared with the PHY driver. */ - u32 bce_shared_hw_cfg; - u32 bce_port_hw_cfg; + u32 bce_shared_hw_cfg; + u32 bce_port_hw_cfg; - bus_addr_t max_bus_addr; + bus_addr_t max_bus_addr; /* PCI bus speed */ - u16 bus_speed_mhz; + u16 bus_speed_mhz; /* PCIe link width */ - u16 link_width; + u16 link_width; /* PCIe link speed */ - u16 link_speed; + u16 link_speed; /* Flash NVRAM settings */ - struct flash_spec *bce_flash_info; + struct flash_spec *bce_flash_info; /* Flash NVRAM size */ - u32 bce_flash_size; + u32 bce_flash_size; /* Shared Memory base address */ - u32 bce_shmem_base; + u32 bce_shmem_base; /* Name string */ - char * bce_name; + char *bce_name; /* Tracks the version of bootcode firmware. */ - char bce_bc_ver[32]; + char bce_bc_ver[32]; /* Tracks the version of management firmware. */ - char bce_mfw_ver[32]; + char bce_mfw_ver[32]; - /* + /* * Tracks the state of the firmware. 0 = Running while any * other value indicates that the firmware is not responding. */ - u16 bce_fw_timed_out; + u16 bce_fw_timed_out; - /* + /* * An incrementing sequence used to coordinate messages passed * from the driver to the firmware. - */ - u16 bce_fw_wr_seq; + */ + u16 bce_fw_wr_seq; - /* + /* * An incrementing sequence used to let the firmware know that * the driver is still operating. Without the pulse, management * firmware such as IPMI or UMP will operate in OS absent state. - */ - u16 bce_fw_drv_pulse_wr_seq; + */ + u16 bce_fw_drv_pulse_wr_seq; /* Tracks whether firmware has lost the driver's pulse. */ - u16 bce_drv_cardiac_arrest; + u16 bce_drv_cardiac_arrest; /* Ethernet MAC address. */ - u_char eaddr[6]; + u_char eaddr[6]; - /* + /* * These setting are used by the host coalescing (HC) block to * to control how often the status block, statistics block and * interrupts are generated. - */ - u16 bce_tx_quick_cons_trip_int; - u16 bce_tx_quick_cons_trip; - u16 bce_rx_quick_cons_trip_int; - u16 bce_rx_quick_cons_trip; - u16 bce_tx_ticks_int; - u16 bce_tx_ticks; - u16 bce_rx_ticks_int; - u16 bce_rx_ticks; - u32 bce_stats_ticks; + */ + u16 bce_tx_quick_cons_trip_int; + u16 bce_tx_quick_cons_trip; + u16 bce_rx_quick_cons_trip_int; + u16 bce_rx_quick_cons_trip; + u16 bce_tx_ticks_int; + u16 bce_tx_ticks; + u16 bce_rx_ticks_int; + u16 bce_rx_ticks; + u32 bce_stats_ticks; /* ToDo: Can these be removed? */ - u16 bce_comp_prod_trip_int; - u16 bce_comp_prod_trip; - u16 bce_com_ticks_int; - u16 bce_com_ticks; - u16 bce_cmd_ticks_int; - u16 bce_cmd_ticks; + u16 bce_comp_prod_trip_int; + u16 bce_comp_prod_trip; + u16 bce_com_ticks_int; + u16 bce_com_ticks; + u16 bce_cmd_ticks_int; + u16 bce_cmd_ticks; /* The address of the integrated PHY on the MII bus. */ - int bce_phy_addr; + int bce_phy_addr; /* The device handle for the MII bus child device. */ - device_t bce_miibus; + device_t bce_miibus; /* Driver maintained TX chain pointers and byte counter. */ - u16 rx_prod; - u16 rx_cons; + u16 rx_prod; + u16 rx_cons; /* Counts the bytes used in the RX chain. */ - u32 rx_prod_bseq; - u16 tx_prod; - u16 tx_cons; + u32 rx_prod_bseq; + u16 tx_prod; + u16 tx_cons; /* Counts the bytes used in the TX chain. */ - u32 tx_prod_bseq; + u32 tx_prod_bseq; #ifdef BCE_JUMBO_HDRSPLIT - u16 pg_prod; - u16 pg_cons; + u16 pg_prod; + u16 pg_cons; #endif - int bce_link_up; - struct callout bce_tick_callout; - struct callout bce_pulse_callout; + int bce_link_up; + struct callout bce_tick_callout; + struct callout bce_pulse_callout; /* Ticks until chip reset */ - int watchdog_timer; + int watchdog_timer; /* Frame size and mbuf allocation size for RX frames. */ - u32 max_frame_size; - int rx_bd_mbuf_alloc_size; - int rx_bd_mbuf_data_len; - int rx_bd_mbuf_align_pad; + u32 max_frame_size; + int rx_bd_mbuf_alloc_size; + int rx_bd_mbuf_data_len; + int rx_bd_mbuf_align_pad; #ifdef BCE_JUMBO_HDRSPLIT - int pg_bd_mbuf_alloc_size; + int pg_bd_mbuf_alloc_size; #endif /* Receive mode settings (i.e promiscuous, multicast, etc.). */ - u32 rx_mode; + u32 rx_mode; /* Bus tag for the bce controller. */ bus_dma_tag_t parent_tag; @@ -6595,9 +6596,9 @@ struct bce_softc bus_addr_t status_block_paddr; /* Driver maintained status block values. */ - u16 last_status_idx; - u16 hw_rx_cons; - u16 hw_tx_cons; + u16 last_status_idx; + u16 hw_rx_cons; + u16 hw_tx_cons; /* H/W maintained statistics block. */ bus_dma_tag_t stats_tag; @@ -6606,7 +6607,7 @@ struct bce_softc bus_addr_t stats_block_paddr; /* H/W maintained context block. */ - int ctx_pages; + int ctx_pages; bus_dma_tag_t ctx_tag; /* BCM5709/16 use host memory for context. */ @@ -6637,139 +6638,156 @@ struct bce_softc #endif /* Track the number of buffer descriptors in use. */ - u16 free_rx_bd; - u16 max_rx_bd; - u16 used_tx_bd; - u16 max_tx_bd; + u16 free_rx_bd; + u16 max_rx_bd; + u16 used_tx_bd; + u16 max_tx_bd; #ifdef BCE_JUMBO_HDRSPLIT - u16 free_pg_bd; - u16 max_pg_bd; + u16 free_pg_bd; + u16 max_pg_bd; #endif /* Provides access to hardware statistics through sysctl. */ - u64 stat_IfHCInOctets; - u64 stat_IfHCInBadOctets; - u64 stat_IfHCOutOctets; - u64 stat_IfHCOutBadOctets; - u64 stat_IfHCInUcastPkts; - u64 stat_IfHCInMulticastPkts; - u64 stat_IfHCInBroadcastPkts; - u64 stat_IfHCOutUcastPkts; - u64 stat_IfHCOutMulticastPkts; - u64 stat_IfHCOutBroadcastPkts; + u64 stat_IfHCInOctets; + u64 stat_IfHCInBadOctets; + u64 stat_IfHCOutOctets; + u64 stat_IfHCOutBadOctets; + u64 stat_IfHCInUcastPkts; + u64 stat_IfHCInMulticastPkts; + u64 stat_IfHCInBroadcastPkts; + u64 stat_IfHCOutUcastPkts; + u64 stat_IfHCOutMulticastPkts; + u64 stat_IfHCOutBroadcastPkts; - u32 stat_emac_tx_stat_dot3statsinternalmactransmiterrors; - u32 stat_Dot3StatsCarrierSenseErrors; - u32 stat_Dot3StatsFCSErrors; - u32 stat_Dot3StatsAlignmentErrors; - u32 stat_Dot3StatsSingleCollisionFrames; - u32 stat_Dot3StatsMultipleCollisionFrames; - u32 stat_Dot3StatsDeferredTransmissions; - u32 stat_Dot3StatsExcessiveCollisions; - u32 stat_Dot3StatsLateCollisions; - u32 stat_EtherStatsCollisions; - u32 stat_EtherStatsFragments; - u32 stat_EtherStatsJabbers; - u32 stat_EtherStatsUndersizePkts; - u32 stat_EtherStatsOversizePkts; - u32 stat_EtherStatsPktsRx64Octets; - u32 stat_EtherStatsPktsRx65Octetsto127Octets; - u32 stat_EtherStatsPktsRx128Octetsto255Octets; - u32 stat_EtherStatsPktsRx256Octetsto511Octets; - u32 stat_EtherStatsPktsRx512Octetsto1023Octets; - u32 stat_EtherStatsPktsRx1024Octetsto1522Octets; - u32 stat_EtherStatsPktsRx1523Octetsto9022Octets; - u32 stat_EtherStatsPktsTx64Octets; - u32 stat_EtherStatsPktsTx65Octetsto127Octets; - u32 stat_EtherStatsPktsTx128Octetsto255Octets; - u32 stat_EtherStatsPktsTx256Octetsto511Octets; - u32 stat_EtherStatsPktsTx512Octetsto1023Octets; - u32 stat_EtherStatsPktsTx1024Octetsto1522Octets; - u32 stat_EtherStatsPktsTx1523Octetsto9022Octets; - u32 stat_XonPauseFramesReceived; - u32 stat_XoffPauseFramesReceived; - u32 stat_OutXonSent; - u32 stat_OutXoffSent; - u32 stat_FlowControlDone; - u32 stat_MacControlFramesReceived; - u32 stat_XoffStateEntered; - u32 stat_IfInFramesL2FilterDiscards; - u32 stat_IfInRuleCheckerDiscards; - u32 stat_IfInFTQDiscards; - u32 stat_IfInMBUFDiscards; - u32 stat_IfInRuleCheckerP4Hit; - u32 stat_CatchupInRuleCheckerDiscards; - u32 stat_CatchupInFTQDiscards; - u32 stat_CatchupInMBUFDiscards; - u32 stat_CatchupInRuleCheckerP4Hit; + u32 stat_emac_tx_stat_dot3statsinternalmactransmiterrors; + u32 stat_Dot3StatsCarrierSenseErrors; + u32 stat_Dot3StatsFCSErrors; + u32 stat_Dot3StatsAlignmentErrors; + u32 stat_Dot3StatsSingleCollisionFrames; + u32 stat_Dot3StatsMultipleCollisionFrames; + u32 stat_Dot3StatsDeferredTransmissions; + u32 stat_Dot3StatsExcessiveCollisions; + u32 stat_Dot3StatsLateCollisions; + u32 stat_EtherStatsCollisions; + u32 stat_EtherStatsFragments; + u32 stat_EtherStatsJabbers; + u32 stat_EtherStatsUndersizePkts; + u32 stat_EtherStatsOversizePkts; + u32 stat_EtherStatsPktsRx64Octets; + u32 stat_EtherStatsPktsRx65Octetsto127Octets; + u32 stat_EtherStatsPktsRx128Octetsto255Octets; + u32 stat_EtherStatsPktsRx256Octetsto511Octets; + u32 stat_EtherStatsPktsRx512Octetsto1023Octets; + u32 stat_EtherStatsPktsRx1024Octetsto1522Octets; + u32 stat_EtherStatsPktsRx1523Octetsto9022Octets; + u32 stat_EtherStatsPktsTx64Octets; + u32 stat_EtherStatsPktsTx65Octetsto127Octets; + u32 stat_EtherStatsPktsTx128Octetsto255Octets; + u32 stat_EtherStatsPktsTx256Octetsto511Octets; + u32 stat_EtherStatsPktsTx512Octetsto1023Octets; + u32 stat_EtherStatsPktsTx1024Octetsto1522Octets; + u32 stat_EtherStatsPktsTx1523Octetsto9022Octets; + u32 stat_XonPauseFramesReceived; + u32 stat_XoffPauseFramesReceived; + u32 stat_OutXonSent; + u32 stat_OutXoffSent; + u32 stat_FlowControlDone; + u32 stat_MacControlFramesReceived; + u32 stat_XoffStateEntered; + u32 stat_IfInFramesL2FilterDiscards; + u32 stat_IfInRuleCheckerDiscards; + u32 stat_IfInFTQDiscards; + u32 stat_IfInMBUFDiscards; + u32 stat_IfInRuleCheckerP4Hit; + u32 stat_CatchupInRuleCheckerDiscards; + u32 stat_CatchupInFTQDiscards; + u32 stat_CatchupInMBUFDiscards; + u32 stat_CatchupInRuleCheckerP4Hit; /* Provides access to certain firmware statistics. */ - u32 com_no_buffers; + u32 com_no_buffers; /* Recoverable failure counters. */ - u32 mbuf_alloc_failed_count; - u32 fragmented_mbuf_count; - u32 unexpected_attention_count; - u32 l2fhdr_error_count; - u32 dma_map_addr_tx_failed_count; - u32 dma_map_addr_rx_failed_count; + u32 mbuf_alloc_failed_count; + u32 mbuf_frag_count; + u32 unexpected_attention_count; + u32 l2fhdr_error_count; + u32 dma_map_addr_tx_failed_count; + u32 dma_map_addr_rx_failed_count; /* Host coalescing block command register */ - u32 hc_command; + u32 hc_command; /* Bootcode state */ - u32 bc_state; + u32 bc_state; #ifdef BCE_DEBUG /* Simulated recoverable failure counters. */ - u32 mbuf_alloc_failed_sim_count; - u32 unexpected_attention_sim_count; - u32 l2fhdr_error_sim_count; - u32 dma_map_addr_failed_sim_count; + u32 mbuf_alloc_failed_sim_count; + u32 unexpected_attention_sim_count; + u32 l2fhdr_error_sim_count; + u32 dma_map_addr_failed_sim_count; /* Track the number of enqueued mbufs. */ - int debug_tx_mbuf_alloc; - int debug_rx_mbuf_alloc; + int debug_tx_mbuf_alloc; + int debug_rx_mbuf_alloc; #ifdef BCE_JUMBO_HDRSPLIT - int debug_pg_mbuf_alloc; + int debug_pg_mbuf_alloc; #endif /* Track how many and what type of interrupts are generated. */ - u32 interrupts_generated; - u32 interrupts_handled; - u32 rx_interrupts; - u32 tx_interrupts; - u32 phy_interrupts; + u32 interrupts_generated; + u32 interrupts_handled; + u32 interrupts_rx; + u32 interrupts_tx; + u32 phy_interrupts; /* Track interrupt time (25MHz clock). */ - u64 rx_intr_time; - u64 tx_intr_time; + u64 rx_intr_time; + u64 tx_intr_time; /* Lowest number of rx_bd's free. */ - u32 rx_low_watermark; + u32 rx_low_watermark; /* Number of times the RX chain was empty. */ - u32 rx_empty_count; + u32 rx_empty_count; #ifdef BCE_JUMBO_HDRSPLIT - /* Lowest number of pages free. */ - u32 pg_low_watermark; + u32 pg_low_watermark; /* Number of times the page chain was empty. */ - u32 pg_empty_count; + u32 pg_empty_count; #endif /* Greatest number of tx_bd's used. */ - u32 tx_hi_watermark; + u32 tx_hi_watermark; /* Number of times the TX chain was full. */ - u32 tx_full_count; + u32 tx_full_count; - /* Number of TSO frames enqueued. */ - u32 requested_tso_frames; + /* Number of TSO frames requested. */ + u32 tso_frames_requested; + + /* Number of TSO frames completed. */ + u32 tso_frames_completed; + + /* Number of TSO frames failed. */ + u32 tso_frames_failed; + + /* Number of IP checksum offload frames.*/ + u32 csum_offload_ip; + + /* Number of TCP/UDP checksum offload frames.*/ + u32 csum_offload_tcp_udp; + + /* Number of VLAN tagged frames received. */ + u32 vlan_tagged_frames_rcvd; + + /* Number of VLAN tagged frames stripped. */ + u32 vlan_tagged_frames_stripped; #endif }; diff --git a/sys/dev/bge/if_bge.c b/sys/dev/bge/if_bge.c index 4333f2044b7..5a51d7988ea 100644 --- a/sys/dev/bge/if_bge.c +++ b/sys/dev/bge/if_bge.c @@ -519,7 +519,7 @@ bge_has_eaddr(struct bge_softc *sc) */ if (OF_getprop(ofw_bus_get_node(dev), SPARC64_OFW_SUBVENDOR, &subvendor, sizeof(subvendor)) == sizeof(subvendor) && - subvendor == SUN_VENDORID) + (subvendor == FJTSU_VENDORID || subvendor == SUN_VENDORID)) return (0); memset(buf, 0, sizeof(buf)); if (OF_package_to_path(ofw_bus_get_node(dev), buf, sizeof(buf)) > 0) { diff --git a/sys/dev/bwi/if_bwi.c b/sys/dev/bwi/if_bwi.c index 96c5cc6d3e5..724778d192c 100644 --- a/sys/dev/bwi/if_bwi.c +++ b/sys/dev/bwi/if_bwi.c @@ -461,8 +461,8 @@ bwi_attach(struct bwi_softc *sc) ifp->if_init = bwi_init; ifp->if_ioctl = bwi_ioctl; ifp->if_start = bwi_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); callout_init_mtx(&sc->sc_watchdog_timer, &sc->sc_mtx, 0); diff --git a/sys/dev/bwn/if_bwn.c b/sys/dev/bwn/if_bwn.c index 983f9972d60..fd364035d3c 100644 --- a/sys/dev/bwn/if_bwn.c +++ b/sys/dev/bwn/if_bwn.c @@ -1193,8 +1193,8 @@ bwn_attach_pre(struct bwn_softc *sc) ifp->if_init = bwn_init; ifp->if_ioctl = bwn_ioctl; ifp->if_start = bwn_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); return (0); @@ -9368,6 +9368,8 @@ bwn_rxeof(struct bwn_mac *mac, struct mbuf *m, const void *_rxhdr) rssi = rxhdr->phy.abg.rssi; /* XXX incorrect RSSI calculation? */ noise = mac->mac_stats.link_noise; + ifp->if_ipackets++; + BWN_UNLOCK(sc); ni = ieee80211_find_rxnode(ic, wh); diff --git a/sys/dev/cas/if_cas.c b/sys/dev/cas/if_cas.c index edcfec47cdd..3522f7ca3b7 100644 --- a/sys/dev/cas/if_cas.c +++ b/sys/dev/cas/if_cas.c @@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$"); #include #if defined(__powerpc__) || defined(__sparc64__) +#include #include #include #endif @@ -321,55 +322,82 @@ cas_attach(struct cas_softc *sc) } } - CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII); - - cas_mifinit(sc); - - /* - * Look for an external PHY. - */ - error = ENXIO; - v = CAS_READ_4(sc, CAS_MIF_CONF); - if ((v & CAS_MIF_CONF_MDI1) != 0) { - v |= CAS_MIF_CONF_PHY_SELECT; - CAS_WRITE_4(sc, CAS_MIF_CONF, v); - switch (sc->sc_variant) { - default: - sc->sc_phyad = -1; - break; + if ((sc->sc_flags & CAS_SERDES) == 0) { + CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII); + CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); + cas_mifinit(sc); + /* + * Look for an external PHY. + */ + error = ENXIO; + v = CAS_READ_4(sc, CAS_MIF_CONF); + if ((v & CAS_MIF_CONF_MDI1) != 0) { + v |= CAS_MIF_CONF_PHY_SELECT; + CAS_WRITE_4(sc, CAS_MIF_CONF, v); + CAS_BARRIER(sc, CAS_MIF_CONF, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); + /* Enable/unfreeze the GMII pins of Saturn. */ + if (sc->sc_variant == CAS_SATURN) { + CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0); + CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, + BUS_SPACE_BARRIER_READ | + BUS_SPACE_BARRIER_WRITE); + } + switch (sc->sc_variant) { + default: + sc->sc_phyad = -1; + break; + } + error = mii_phy_probe(sc->sc_dev, &sc->sc_miibus, + cas_mediachange, cas_mediastatus); } - error = mii_phy_probe(sc->sc_dev, &sc->sc_miibus, - cas_mediachange, cas_mediastatus); - } - - /* - * Fall back on an internal PHY if no external PHY was found. - */ - if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) { - v &= ~CAS_MIF_CONF_PHY_SELECT; - CAS_WRITE_4(sc, CAS_MIF_CONF, v); - switch (sc->sc_variant) { - default: - sc->sc_phyad = -1; - break; + /* + * Fall back on an internal PHY if no external PHY was found. + */ + if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) { + v &= ~CAS_MIF_CONF_PHY_SELECT; + CAS_WRITE_4(sc, CAS_MIF_CONF, v); + CAS_BARRIER(sc, CAS_MIF_CONF, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); + /* Freeze the GMII pins of Saturn for saving power. */ + if (sc->sc_variant == CAS_SATURN) { + CAS_WRITE_4(sc, CAS_SATURN_PCFG, + CAS_SATURN_PCFG_FSI); + CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, + BUS_SPACE_BARRIER_READ | + BUS_SPACE_BARRIER_WRITE); + } + switch (sc->sc_variant) { + default: + sc->sc_phyad = -1; + break; + } + error = mii_phy_probe(sc->sc_dev, &sc->sc_miibus, + cas_mediachange, cas_mediastatus); } - error = mii_phy_probe(sc->sc_dev, &sc->sc_miibus, - cas_mediachange, cas_mediastatus); - } - - /* - * Try the external PCS SERDES if we didn't find any PHYs. - */ - if (error != 0) { + } else { + /* + * Use the external PCS SERDES. + */ CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_SERDES); + CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_WRITE); + /* Enable/unfreeze the SERDES pins of Saturn. */ + if (sc->sc_variant == CAS_SATURN) { + CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0); + CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, + BUS_SPACE_BARRIER_WRITE); + } CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); - CAS_WRITE_4(sc, CAS_PCS_CONF_EN, CAS_PCS_CONF_EN); - sc->sc_flags |= CAS_SERDES; + CAS_BARRIER(sc, CAS_PCS_SERDES_CTRL, 4, + BUS_SPACE_BARRIER_WRITE); + CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); + CAS_BARRIER(sc, CAS_PCS_CONF, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); sc->sc_phyad = CAS_PHYAD_EXTERNAL; error = mii_phy_probe(sc->sc_dev, &sc->sc_miibus, cas_mediachange, cas_mediastatus); } - if (error != 0) { device_printf(sc->sc_dev, "PHY probe failed: %d\n", error); goto fail_rxmap; @@ -956,8 +984,9 @@ cas_init_locked(struct cas_softc *sc) __func__); #endif - /* Re-initialize the MIF. */ - cas_mifinit(sc); + if ((sc->sc_flags & CAS_SERDES) == 0) + /* Re-initialize the MIF. */ + cas_mifinit(sc); /* step 3. Setup data structures in host memory. */ cas_meminit(sc); @@ -2105,6 +2134,8 @@ cas_mifinit(struct cas_softc *sc) /* Configure the MIF in frame mode. */ CAS_WRITE_4(sc, CAS_MIF_CONF, CAS_READ_4(sc, CAS_MIF_CONF) & ~CAS_MIF_CONF_BB_MODE); + CAS_BARRIER(sc, CAS_MIF_CONF, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } /* @@ -2219,10 +2250,16 @@ cas_mii_writereg(device_t dev, int phy, int reg, int val) CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_ANAR, val); + CAS_BARRIER(sc, CAS_PCS_ANAR, 4, + BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); + CAS_BARRIER(sc, CAS_PCS_CONF, 4, + BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); + CAS_BARRIER(sc, CAS_PCS_CONF, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); case MII_ANLPAR: reg = CAS_PCS_ANLPAR; @@ -2233,6 +2270,8 @@ cas_mii_writereg(device_t dev, int phy, int reg, int val) return (0); } CAS_WRITE_4(sc, reg, val); + CAS_BARRIER(sc, reg, 4, + BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); } @@ -2630,15 +2669,20 @@ static struct resource_spec cas_pci_res_spec[] = { { -1, 0 } }; +#define CAS_LOCAL_MAC_ADDRESS "local-mac-address" +#define CAS_PHY_INTERFACE "phy-interface" +#define CAS_PHY_TYPE "phy-type" +#define CAS_PHY_TYPE_PCS "pcs" + static int cas_pci_attach(device_t dev) { + char buf[sizeof(CAS_LOCAL_MAC_ADDRESS)]; struct cas_softc *sc; int i; #if !(defined(__powerpc__) || defined(__sparc64__)) u_char enaddr[4][ETHER_ADDR_LEN]; - char lma[sizeof("local-mac-address")]; - int found, j; + u_int j, k, lma, pcs[4], phy; #endif sc = device_get_softc(dev); @@ -2679,13 +2723,20 @@ cas_pci_attach(device_t dev) #if defined(__powerpc__) || defined(__sparc64__) OF_getetheraddr(dev, sc->sc_enaddr); + if (OF_getprop(ofw_bus_get_node(dev), CAS_PHY_INTERFACE, buf, + sizeof(buf)) > 0 || OF_getprop(ofw_bus_get_node(dev), + CAS_PHY_TYPE, buf, sizeof(buf)) > 0) { + buf[sizeof(buf) - 1] = '\0'; + if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) + sc->sc_flags |= CAS_SERDES; + } #else /* - * Dig out VPD (vital product data) and read the MAX address. - * The VPD resides in the PCI Expansion ROM (PCI FCode) and - * can't be accessed via the PCI capability pointer. - * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format - * described in US Patent 7149820. + * Dig out VPD (vital product data) and read the MAC address as well + * as the PHY type. The VPD resides in the PCI Expansion ROM (PCI + * FCode) and can't be accessed via the PCI capability pointer. + * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format described + * in the free US Patent 7149820. */ #define PCI_ROMHDR_SIZE 0x1c @@ -2719,7 +2770,10 @@ cas_pci_attach(device_t dev) #define CAS_ROM_READ_4(sc, offs) \ CAS_READ_4((sc), CAS_PCI_ROM_OFFSET + (offs)) - found = 0; + lma = phy = 0; + memset(enaddr, 0, sizeof(enaddr)); + memset(pcs, 0, sizeof(pcs)); + /* Enable PCI Expansion ROM access. */ CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, CAS_BIM_LDEV_OEN_PAD | CAS_BIM_LDEV_OEN_PROM); @@ -2768,23 +2822,51 @@ cas_pci_attach(device_t dev) if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE) != 'I') /* no instance property */ continue; - if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) != 'B') - /* no byte array */ - continue; - if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 4) != - ETHER_ADDR_LEN) - continue; - bus_read_region_1(sc->sc_res[CAS_RES_MEM], - CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, - lma, sizeof(lma)); - if (strcmp(lma, "local-mac-address") != 0) - continue; - bus_read_region_1(sc->sc_res[CAS_RES_MEM], - CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5 + - sizeof(lma), enaddr[found], - sizeof(enaddr[found])); - if (found++ == 4) - break; + if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'B') { + /* byte array */ + if (CAS_ROM_READ_1(sc, + j + PCI_VPD_SIZE + 4) != ETHER_ADDR_LEN) + continue; + bus_read_region_1(sc->sc_res[CAS_RES_MEM], + CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, + buf, sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + if (strcmp(buf, CAS_LOCAL_MAC_ADDRESS) != 0) + continue; + bus_read_region_1(sc->sc_res[CAS_RES_MEM], + CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + + 5 + sizeof(CAS_LOCAL_MAC_ADDRESS), + enaddr[lma], sizeof(enaddr[lma])); + lma++; + if (lma == 4 && phy == 4) + break; + } else if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == + 'S') { + /* string */ + if (CAS_ROM_READ_1(sc, + j + PCI_VPD_SIZE + 4) != + sizeof(CAS_PHY_TYPE_PCS)) + continue; + bus_read_region_1(sc->sc_res[CAS_RES_MEM], + CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, + buf, sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + if (strcmp(buf, CAS_PHY_INTERFACE) == 0) + k = sizeof(CAS_PHY_INTERFACE); + else if (strcmp(buf, CAS_PHY_TYPE) == 0) + k = sizeof(CAS_PHY_TYPE); + else + continue; + bus_read_region_1(sc->sc_res[CAS_RES_MEM], + CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + + 5 + k, buf, sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) + pcs[phy] = 1; + phy++; + if (lma == 4 && phy == 4) + break; + } } break; default: @@ -2795,14 +2877,24 @@ cas_pci_attach(device_t dev) fail_prom: CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, 0); - if (found == 0) { + if (lma == 0) { device_printf(dev, "could not determine Ethernet address\n"); goto fail; } i = 0; - if (found > 1 && pci_get_slot(dev) < sizeof(enaddr) / sizeof(*enaddr)) + if (lma > 1 && pci_get_slot(dev) < sizeof(enaddr) / sizeof(*enaddr)) i = pci_get_slot(dev); memcpy(sc->sc_enaddr, enaddr[i], ETHER_ADDR_LEN); + + if (phy == 0) { + device_printf(dev, "could not determine PHY type\n"); + goto fail; + } + i = 0; + if (phy > 1 && pci_get_slot(dev) < sizeof(pcs) / sizeof(*pcs)) + i = pci_get_slot(dev); + if (pcs[i] != 0) + sc->sc_flags |= CAS_SERDES; #endif if (cas_attach(sc) != 0) { diff --git a/sys/dev/cas/if_casreg.h b/sys/dev/cas/if_casreg.h index 97250e02791..0d1c1d822b9 100644 --- a/sys/dev/cas/if_casreg.h +++ b/sys/dev/cas/if_casreg.h @@ -68,6 +68,7 @@ #define CAS_STATUS4 0x105c /* interrupt status 4 for INTD */ #define CAS_CLEAR_ALIAS4 0x1060 /* clear mask alias 4 for INTD */ #define CAS_STATUS_ALIAS4 0x1064 /* interrupt status alias 4 for INTD */ +#define CAS_SATURN_PCFG 0x106c /* internal MACPHY pin configuration */ #define CAS_CAW_RX_WGHT_MASK 0x00000003 /* RX DMA factor for... */ #define CAS_CAW_RX_WGHT_SHFT 0 /* ...weighted round robin */ @@ -171,6 +172,17 @@ /* INTn enable bit for CAS_INTMASK[2-4] */ #define CAS_INTMASKN_EN 0x00000080 /* INT[B-D] enable */ +#define CAS_SATURN_PCFG_TLA 0x00000001 /* PHY activity LED */ +#define CAS_SATURN_PCFG_FLA 0x00000002 /* PHY 10MBit/sec LED */ +#define CAS_SATURN_PCFG_CLA 0x00000004 /* PHY 100MBit/sec LED */ +#define CAS_SATURN_PCFG_LLA 0x00000008 /* PHY 1000MBit/sec LED */ +#define CAS_SATURN_PCFG_RLA 0x00000010 /* PHY full-duplex LED */ +#define CAS_SATURN_PCFG_PDS 0x00000020 /* PHY debug mode */ +#define CAS_SATURN_PCFG_MTP 0x00000080 /* test point select */ +#define CAS_SATURN_PCFG_GMO 0x00000100 /* GMII observe */ +#define CAS_SATURN_PCFG_FSI 0x00000200 /* freeze GMII/SERDES */ +#define CAS_SATURN_PCFG_LAD 0x00000800 /* MAC LED control active low */ + /* TX DMA registers */ #define CAS_TX_CONF 0x2004 /* TX configuration */ #define CAS_TX_FIFO_WR 0x2014 /* FIFO write pointer */ diff --git a/sys/dev/ce/if_ce.c b/sys/dev/ce/if_ce.c index f04fd0916e6..d151b4243a8 100644 --- a/sys/dev/ce/if_ce.c +++ b/sys/dev/ce/if_ce.c @@ -701,9 +701,9 @@ static int ce_attach (device_t dev) #endif continue; } - d->queue.ifq_maxlen = IFQ_MAXLEN; - d->hi_queue.ifq_maxlen = IFQ_MAXLEN; - d->rqueue.ifq_maxlen = IFQ_MAXLEN; + d->queue.ifq_maxlen = ifqmaxlen; + d->hi_queue.ifq_maxlen = ifqmaxlen; + d->rqueue.ifq_maxlen = ifqmaxlen; #if __FreeBSD_version >= 500000 mtx_init (&d->queue.ifq_mtx, "ce_queue", NULL, MTX_DEF); mtx_init (&d->hi_queue.ifq_mtx, "ce_queue_hi", NULL, MTX_DEF); @@ -732,7 +732,7 @@ static int ce_attach (device_t dev) d->ifp->if_ioctl = ce_sioctl; d->ifp->if_start = ce_ifstart; d->ifp->if_init = ce_initialize; - d->rqueue.ifq_maxlen = IFQ_MAXLEN; + d->rqueue.ifq_maxlen = ifqmaxlen; #if __FreeBSD_version >= 500000 mtx_init (&d->rqueue.ifq_mtx, "ce_rqueue", NULL, MTX_DEF); #endif diff --git a/sys/dev/ciss/ciss.c b/sys/dev/ciss/ciss.c index 7293bb1682d..2a4fb27ae55 100644 --- a/sys/dev/ciss/ciss.c +++ b/sys/dev/ciss/ciss.c @@ -417,6 +417,7 @@ ciss_attach(device_t dev) sc = device_get_softc(dev); sc->ciss_dev = dev; mtx_init(&sc->ciss_mtx, "cissmtx", NULL, MTX_DEF); + callout_init_mtx(&sc->ciss_periodic, &sc->ciss_mtx, 0); /* * Do PCI-specific init. @@ -429,7 +430,6 @@ ciss_attach(device_t dev) */ ciss_initq_free(sc); ciss_initq_notify(sc); - callout_init_mtx(&sc->ciss_periodic, &sc->ciss_mtx, 0); /* * Initalize device sysctls. diff --git a/sys/dev/cm/smc90cx6.c b/sys/dev/cm/smc90cx6.c index 0e9015ea17c..719e46868a1 100644 --- a/sys/dev/cm/smc90cx6.c +++ b/sys/dev/cm/smc90cx6.c @@ -189,7 +189,7 @@ cm_attach(dev) ifp->if_ioctl = cm_ioctl; ifp->if_init = cm_init; /* XXX IFQ_SET_READY(&ifp->if_snd); */ - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; arc_ifattach(ifp, linkaddress); diff --git a/sys/dev/cp/if_cp.c b/sys/dev/cp/if_cp.c index f6eeed5058e..e6402309846 100644 --- a/sys/dev/cp/if_cp.c +++ b/sys/dev/cp/if_cp.c @@ -505,8 +505,8 @@ static int cp_attach (device_t dev) NG_NODE_UNREF (d->node); continue; } - d->queue.ifq_maxlen = IFQ_MAXLEN; - d->hi_queue.ifq_maxlen = IFQ_MAXLEN; + d->queue.ifq_maxlen = ifqmaxlen; + d->hi_queue.ifq_maxlen = ifqmaxlen; mtx_init (&d->queue.ifq_mtx, "cp_queue", NULL, MTX_DEF); mtx_init (&d->hi_queue.ifq_mtx, "cp_queue_hi", NULL, MTX_DEF); #else /*NETGRAPH*/ diff --git a/sys/dev/cs/if_cs.c b/sys/dev/cs/if_cs.c index b32a03d57cf..476b9ddbb96 100644 --- a/sys/dev/cs/if_cs.c +++ b/sys/dev/cs/if_cs.c @@ -500,7 +500,7 @@ cs_attach(device_t dev) ifp->if_start=cs_start; ifp->if_ioctl=cs_ioctl; ifp->if_init=cs_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_flags=(IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); diff --git a/sys/dev/ctau/if_ct.c b/sys/dev/ctau/if_ct.c index 125dfa5ed80..dbdca97411d 100644 --- a/sys/dev/ctau/if_ct.c +++ b/sys/dev/ctau/if_ct.c @@ -722,8 +722,8 @@ static int ct_attach (device_t dev) ct_bus_dma_mem_free (&d->dmamem); continue; } - d->queue.ifq_maxlen = IFQ_MAXLEN; - d->hi_queue.ifq_maxlen = IFQ_MAXLEN; + d->queue.ifq_maxlen = ifqmaxlen; + d->hi_queue.ifq_maxlen = ifqmaxlen; mtx_init (&d->queue.ifq_mtx, "ct_queue", NULL, MTX_DEF); mtx_init (&d->hi_queue.ifq_mtx, "ct_queue_hi", NULL, MTX_DEF); #else /*NETGRAPH*/ diff --git a/sys/dev/cx/if_cx.c b/sys/dev/cx/if_cx.c index 4e9750e1483..c8f53a00bec 100644 --- a/sys/dev/cx/if_cx.c +++ b/sys/dev/cx/if_cx.c @@ -831,8 +831,8 @@ static int cx_attach (device_t dev) cx_bus_dma_mem_free (&d->dmamem); continue; } - d->lo_queue.ifq_maxlen = IFQ_MAXLEN; - d->hi_queue.ifq_maxlen = IFQ_MAXLEN; + d->lo_queue.ifq_maxlen = ifqmaxlen; + d->hi_queue.ifq_maxlen = ifqmaxlen; mtx_init (&d->lo_queue.ifq_mtx, "cx_queue_lo", NULL, MTX_DEF); mtx_init (&d->hi_queue.ifq_mtx, "cx_queue_hi", NULL, MTX_DEF); #else /*NETGRAPH*/ diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h index d1f5ef64c50..1430ca15e81 100644 --- a/sys/dev/cxgb/cxgb_adapter.h +++ b/sys/dev/cxgb/cxgb_adapter.h @@ -204,6 +204,7 @@ struct sge_fl { uint32_t cidx; uint32_t pidx; uint32_t gen; + uint32_t db_pending; bus_addr_t phys_addr; uint32_t cntxt_id; uint32_t empty; @@ -232,6 +233,7 @@ struct sge_txq { uint32_t pidx; uint32_t gen; uint32_t unacked; + uint32_t db_pending; struct tx_desc *desc; struct tx_sw_desc *sdesc; uint32_t token; diff --git a/sys/dev/cxgb/cxgb_ioctl.h b/sys/dev/cxgb/cxgb_ioctl.h index e4b487505a0..3f3ac5a1866 100644 --- a/sys/dev/cxgb/cxgb_ioctl.h +++ b/sys/dev/cxgb/cxgb_ioctl.h @@ -59,6 +59,9 @@ enum { CH_CLEAR_STATS, CH_GET_UP_LA, CH_GET_UP_IOQS, + CH_SET_FILTER, + CH_DEL_FILTER, + CH_GET_FILTER, }; /* statistics categories */ @@ -215,6 +218,29 @@ struct ch_up_ioqs { struct t3_ioq_entry *data; }; +struct ch_filter_tuple { + uint32_t sip; + uint32_t dip; + uint16_t sport; + uint16_t dport; + uint16_t vlan:12; + uint16_t vlan_prio:3; +}; + +struct ch_filter { + uint32_t filter_id; + struct ch_filter_tuple val; + struct ch_filter_tuple mask; + uint16_t mac_addr_idx; + uint8_t mac_hit:1; + uint8_t proto:2; + + uint8_t want_filter_id:1; + uint8_t pass:1; + uint8_t rss:1; + uint8_t qset; +}; + #define CHELSIO_SETREG _IOW('f', CH_SETREG, struct ch_reg) #define CHELSIO_GETREG _IOWR('f', CH_GETREG, struct ch_reg) #define CHELSIO_GETMTUTAB _IOR('f', CH_GETMTUTAB, struct ch_mtus) @@ -239,4 +265,7 @@ struct ch_up_ioqs { #define CHELSIO_GET_EEPROM _IOWR('f', CH_GET_EEPROM, struct ch_eeprom) #define CHELSIO_GET_UP_LA _IOWR('f', CH_GET_UP_LA, struct ch_up_la) #define CHELSIO_GET_UP_IOQS _IOWR('f', CH_GET_UP_IOQS, struct ch_up_ioqs) +#define CHELSIO_SET_FILTER _IOW('f', CH_SET_FILTER, struct ch_filter) +#define CHELSIO_DEL_FILTER _IOW('f', CH_DEL_FILTER, struct ch_filter) +#define CHELSIO_GET_FILTER _IOWR('f', CH_GET_FILTER, struct ch_filter) #endif diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index a47284fe9ae..13fed449fa7 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -99,6 +99,13 @@ static void cxgb_ext_intr_handler(void *, int); static void cxgb_tick_handler(void *, int); static void cxgb_tick(void *); static void setup_rss(adapter_t *sc); +static int alloc_filters(struct adapter *); +static int setup_hw_filters(struct adapter *); +static int set_filter(struct adapter *, int, const struct filter_info *); +static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int, + unsigned int, u64, u64); +static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int, + unsigned int, u64, u64); /* Attachment glue for the PCI controller end of the device. Each port of * the device is attached separately, as defined later. @@ -981,7 +988,7 @@ cxgb_makedev(struct port_info *pi) #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ - IFCAP_VLAN_HWTSO) + IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE) #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6) static int @@ -1012,7 +1019,7 @@ cxgb_port_attach(device_t dev) ifp->if_ioctl = cxgb_ioctl; ifp->if_start = cxgb_start; - ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len; + ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen); IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); @@ -1661,6 +1668,13 @@ cxgb_up(struct adapter *sc) if ((err = update_tpsram(sc))) goto out; + if (is_offload(sc)) { + sc->params.mc5.nservers = 0; + sc->params.mc5.nroutes = 0; + sc->params.mc5.nfilters = t3_mc5_size(&sc->mc5) - + MC5_MIN_TIDS; + } + err = t3_init_hw(sc, 0); if (err) goto out; @@ -1672,6 +1686,7 @@ cxgb_up(struct adapter *sc) if (err) goto out; + alloc_filters(sc); setup_rss(sc); t3_intr_clear(sc); @@ -1698,6 +1713,7 @@ cxgb_up(struct adapter *sc) if (!(sc->flags & QUEUES_BOUND)) { bind_qsets(sc); + setup_hw_filters(sc); sc->flags |= QUEUES_BOUND; } @@ -3076,6 +3092,139 @@ cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, free(buf, M_DEVBUF); break; } + case CHELSIO_SET_FILTER: { + struct ch_filter *f = (struct ch_filter *)data;; + struct filter_info *p; + unsigned int nfilters = sc->params.mc5.nfilters; + + if (!is_offload(sc)) + return (EOPNOTSUPP); /* No TCAM */ + if (!(sc->flags & FULL_INIT_DONE)) + return (EAGAIN); /* mc5 not setup yet */ + if (nfilters == 0) + return (EBUSY); /* TOE will use TCAM */ + + /* sanity checks */ + if (f->filter_id >= nfilters || + (f->val.dip && f->mask.dip != 0xffffffff) || + (f->val.sport && f->mask.sport != 0xffff) || + (f->val.dport && f->mask.dport != 0xffff) || + (f->val.vlan && f->mask.vlan != 0xfff) || + (f->val.vlan_prio && + f->mask.vlan_prio != FILTER_NO_VLAN_PRI) || + (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) || + f->qset >= SGE_QSETS || + sc->rrss_map[f->qset] >= RSS_TABLE_SIZE) + return (EINVAL); + + /* Was allocated with M_WAITOK */ + KASSERT(sc->filters, ("filter table NULL\n")); + + p = &sc->filters[f->filter_id]; + if (p->locked) + return (EPERM); + + bzero(p, sizeof(*p)); + p->sip = f->val.sip; + p->sip_mask = f->mask.sip; + p->dip = f->val.dip; + p->sport = f->val.sport; + p->dport = f->val.dport; + p->vlan = f->mask.vlan ? f->val.vlan : 0xfff; + p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) : + FILTER_NO_VLAN_PRI; + p->mac_hit = f->mac_hit; + p->mac_vld = f->mac_addr_idx != 0xffff; + p->mac_idx = f->mac_addr_idx; + p->pkt_type = f->proto; + p->report_filter_id = f->want_filter_id; + p->pass = f->pass; + p->rss = f->rss; + p->qset = f->qset; + + error = set_filter(sc, f->filter_id, p); + if (error == 0) + p->valid = 1; + break; + } + case CHELSIO_DEL_FILTER: { + struct ch_filter *f = (struct ch_filter *)data; + struct filter_info *p; + unsigned int nfilters = sc->params.mc5.nfilters; + + if (!is_offload(sc)) + return (EOPNOTSUPP); + if (!(sc->flags & FULL_INIT_DONE)) + return (EAGAIN); + if (nfilters == 0 || sc->filters == NULL) + return (EINVAL); + if (f->filter_id >= nfilters) + return (EINVAL); + + p = &sc->filters[f->filter_id]; + if (p->locked) + return (EPERM); + if (!p->valid) + return (EFAULT); /* Read "Bad address" as "Bad index" */ + + bzero(p, sizeof(*p)); + p->sip = p->sip_mask = 0xffffffff; + p->vlan = 0xfff; + p->vlan_prio = FILTER_NO_VLAN_PRI; + p->pkt_type = 1; + error = set_filter(sc, f->filter_id, p); + break; + } + case CHELSIO_GET_FILTER: { + struct ch_filter *f = (struct ch_filter *)data; + struct filter_info *p; + unsigned int i, nfilters = sc->params.mc5.nfilters; + + if (!is_offload(sc)) + return (EOPNOTSUPP); + if (!(sc->flags & FULL_INIT_DONE)) + return (EAGAIN); + if (nfilters == 0 || sc->filters == NULL) + return (EINVAL); + + i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1; + for (; i < nfilters; i++) { + p = &sc->filters[i]; + if (!p->valid) + continue; + + bzero(f, sizeof(*f)); + + f->filter_id = i; + f->val.sip = p->sip; + f->mask.sip = p->sip_mask; + f->val.dip = p->dip; + f->mask.dip = p->dip ? 0xffffffff : 0; + f->val.sport = p->sport; + f->mask.sport = p->sport ? 0xffff : 0; + f->val.dport = p->dport; + f->mask.dport = p->dport ? 0xffff : 0; + f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan; + f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff; + f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? + 0 : p->vlan_prio; + f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? + 0 : FILTER_NO_VLAN_PRI; + f->mac_hit = p->mac_hit; + f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff; + f->proto = p->pkt_type; + f->want_filter_id = p->report_filter_id; + f->pass = p->pass; + f->rss = p->rss; + f->qset = p->qset; + + break; + } + + if (i == nfilters) + f->filter_id = 0xffffffff; + break; + } default: return (EOPNOTSUPP); break; @@ -3130,5 +3279,127 @@ cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf) XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } +static int +alloc_filters(struct adapter *sc) +{ + struct filter_info *p; + unsigned int nfilters = sc->params.mc5.nfilters; -MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1); + if (nfilters == 0) + return (0); + + p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO); + sc->filters = p; + + p = &sc->filters[nfilters - 1]; + p->vlan = 0xfff; + p->vlan_prio = FILTER_NO_VLAN_PRI; + p->pass = p->rss = p->valid = p->locked = 1; + + return (0); +} + +static int +setup_hw_filters(struct adapter *sc) +{ + int i, rc; + unsigned int nfilters = sc->params.mc5.nfilters; + + if (!sc->filters) + return (0); + + t3_enable_filters(sc); + + for (i = rc = 0; i < nfilters && !rc; i++) { + if (sc->filters[i].locked) + rc = set_filter(sc, i, &sc->filters[i]); + } + + return (rc); +} + +static int +set_filter(struct adapter *sc, int id, const struct filter_info *f) +{ + int len; + struct mbuf *m; + struct ulp_txpkt *txpkt; + struct work_request_hdr *wr; + struct cpl_pass_open_req *oreq; + struct cpl_set_tcb_field *sreq; + + len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq); + KASSERT(len <= MHLEN, ("filter request too big for an mbuf")); + + id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes - + sc->params.mc5.nfilters; + + m = m_gethdr(M_WAITOK, MT_DATA); + m->m_len = m->m_pkthdr.len = len; + bzero(mtod(m, char *), len); + + wr = mtod(m, struct work_request_hdr *); + wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); + + oreq = (struct cpl_pass_open_req *)(wr + 1); + txpkt = (struct ulp_txpkt *)oreq; + txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); + txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8)); + OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id)); + oreq->local_port = htons(f->dport); + oreq->peer_port = htons(f->sport); + oreq->local_ip = htonl(f->dip); + oreq->peer_ip = htonl(f->sip); + oreq->peer_netmask = htonl(f->sip_mask); + oreq->opt0h = 0; + oreq->opt0l = htonl(F_NO_OFFLOAD); + oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) | + V_CONN_POLICY(CPL_CONN_POLICY_FILTER) | + V_VLAN_PRI(f->vlan_prio >> 1) | + V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) | + V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) | + V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4))); + + sreq = (struct cpl_set_tcb_field *)(oreq + 1); + set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL, + (f->report_filter_id << 15) | (1 << 23) | + ((u64)f->pass << 35) | ((u64)!f->rss << 36)); + set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1); + t3_mgmt_tx(sc, m); + + if (f->pass && !f->rss) { + len = sizeof(*sreq); + m = m_gethdr(M_WAITOK, MT_DATA); + m->m_len = m->m_pkthdr.len = len; + bzero(mtod(m, char *), len); + sreq = mtod(m, struct cpl_set_tcb_field *); + sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + mk_set_tcb_field(sreq, id, 25, 0x3f80000, + (u64)sc->rrss_map[f->qset] << 19); + t3_mgmt_tx(sc, m); + } + return 0; +} + +static inline void +mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid, + unsigned int word, u64 mask, u64 val) +{ + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + req->reply = V_NO_REPLY(1); + req->cpu_idx = 0; + req->word = htons(word); + req->mask = htobe64(mask); + req->val = htobe64(val); +} + +static inline void +set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, + unsigned int word, u64 mask, u64 val) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; + + txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); + txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); + mk_set_tcb_field(req, tid, word, mask, val); +} diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index 9bc36c94fa7..5fdae6be7e7 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -696,7 +696,7 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) struct refill_fl_cb_arg cb_arg; struct mbuf *m; caddr_t cl; - int err, count = 0; + int err; cb_arg.error = 0; while (n--) { @@ -754,12 +754,14 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n) d = q->desc; } q->credits++; - count++; + q->db_pending++; } done: - if (count) + if (q->db_pending >= 32) { + q->db_pending = 0; t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); + } } @@ -810,8 +812,10 @@ __refill_fl(adapter_t *adap, struct sge_fl *fl) static __inline void __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) { - if ((fl->size - fl->credits) < max) - refill_fl(adap, fl, min(max, fl->size - fl->credits)); + uint32_t reclaimable = fl->size - fl->credits; + + if (reclaimable > 0) + refill_fl(adap, fl, min(max, reclaimable)); } /** @@ -1261,7 +1265,7 @@ make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void -check_ring_tx_db(adapter_t *adap, struct sge_txq *q) +check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); @@ -1275,9 +1279,12 @@ check_ring_tx_db(adapter_t *adap, struct sge_txq *q) F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else - wmb(); /* write descriptors before telling HW */ - t3_write_reg(adap, A_SG_KDOORBELL, - F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); + if (mustring || ++q->db_pending >= 32) { + wmb(); /* write descriptors before telling HW */ + t3_write_reg(adap, A_SG_KDOORBELL, + F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); + q->db_pending = 0; + } #endif } @@ -1480,7 +1487,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); - check_ring_tx_db(sc, txq); + check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { int eth_type; @@ -1543,7 +1550,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); - check_ring_tx_db(sc, txq); + check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } @@ -1574,7 +1581,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); - check_ring_tx_db(sc, txq); + check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } @@ -1593,7 +1600,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m) wr_lo = htonl(V_WR_TID(txq->token)); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); - check_ring_tx_db(sc, txq); + check_ring_tx_db(sc, txq, 0); return (0); } @@ -1643,7 +1650,6 @@ cxgb_start_locked(struct sge_qset *qs) { struct mbuf *m_head = NULL; struct sge_txq *txq = &qs->txq[TXQ_ETH]; - int in_use_init = txq->in_use; struct port_info *pi = qs->port; struct ifnet *ifp = pi->ifp; @@ -1655,8 +1661,7 @@ cxgb_start_locked(struct sge_qset *qs) return; } TXQ_LOCK_ASSERT(qs); - while ((txq->in_use - in_use_init < TX_START_MAX_DESC) && - !TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && + while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && pi->link_config.link_ok) { reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); @@ -1674,6 +1679,10 @@ cxgb_start_locked(struct sge_qset *qs) m_head = NULL; } + + if (txq->db_pending) + check_ring_tx_db(pi->adapter, txq, 1); + if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, @@ -1707,6 +1716,9 @@ cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) (error = drbr_enqueue(ifp, br, m)) != 0) return (error); } else { + if (txq->db_pending) + check_ring_tx_db(pi->adapter, txq, 1); + /* * We've bypassed the buf ring so we need to update * the stats directly @@ -2354,7 +2366,7 @@ again: reclaim_completed_tx(qs, 16, TXQ_OFLD); TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); - check_ring_tx_db(adap, q); + check_ring_tx_db(adap, q, 1); return (0); } @@ -3033,7 +3045,7 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) r = rspq->desc; } - if (++rspq->credits >= (rspq->size / 4)) { + if (++rspq->credits >= 64) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } diff --git a/sys/dev/cxgb/sys/mvec.h b/sys/dev/cxgb/sys/mvec.h index ed38dca3444..9db27ddf75d 100644 --- a/sys/dev/cxgb/sys/mvec.h +++ b/sys/dev/cxgb/sys/mvec.h @@ -1,33 +1,31 @@ -/************************************************************************** - * - * Copyright (c) 2007,2009 Kip Macy kmacy@freebsd.org +/*- + * Copyright (c) 2007, 2009 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. The name of Kip Macy nor the names of other - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ * - ***************************************************************************/ + */ #ifndef _MVEC_H_ #define _MVEC_H_ diff --git a/sys/dev/cxgb/sys/uipc_mvec.c b/sys/dev/cxgb/sys/uipc_mvec.c index 4e494ebfc15..f52daa681b1 100644 --- a/sys/dev/cxgb/sys/uipc_mvec.c +++ b/sys/dev/cxgb/sys/uipc_mvec.c @@ -1,32 +1,28 @@ -/************************************************************************** - * - * Copyright (c) 2007-2008, Kip Macy kmacy@freebsd.org +/*- + * Copyright (c) 2007-2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. The name of Kip Macy nor the names of other - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * - ***************************************************************************/ + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include __FBSDID("$FreeBSD$"); diff --git a/sys/dev/drm/ati_pcigart.c b/sys/dev/drm/ati_pcigart.c index e3fecd1071b..063288ed64d 100644 --- a/sys/dev/drm/ati_pcigart.c +++ b/sys/dev/drm/ati_pcigart.c @@ -39,8 +39,9 @@ __FBSDID("$FreeBSD$"); #define ATI_PCIGART_PAGE_SIZE 4096 /* PCI GART page size */ #define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) -#define ATI_PCIE_WRITE 0x4 -#define ATI_PCIE_READ 0x8 +#define ATI_GART_NOSNOOP 0x1 +#define ATI_GART_WRITE 0x4 +#define ATI_GART_READ 0x8 static void drm_ati_alloc_pcigart_table_cb(void *arg, bus_dma_segment_t *segs, @@ -196,13 +197,15 @@ drm_ati_pcigart_init(struct drm_device *dev, case DRM_ATI_GART_IGP: page_base |= (upper_32_bits(entry_addr) & 0xff) << 4; - page_base |= 0xc; + page_base |= ATI_GART_READ | ATI_GART_WRITE; + page_base |= ATI_GART_NOSNOOP; break; case DRM_ATI_GART_PCIE: page_base >>= 8; page_base |= (upper_32_bits(entry_addr) & 0xff) << 24; - page_base |= ATI_PCIE_READ | ATI_PCIE_WRITE; + page_base |= ATI_GART_READ | ATI_GART_WRITE; + page_base |= ATI_GART_NOSNOOP; break; default: case DRM_ATI_GART_PCI: diff --git a/sys/dev/drm/drmP.h b/sys/dev/drm/drmP.h index f89e75032ec..af50893163a 100644 --- a/sys/dev/drm/drmP.h +++ b/sys/dev/drm/drmP.h @@ -49,6 +49,7 @@ struct drm_file; #include #include #include +#include #include #if __FreeBSD_version >= 700000 #include @@ -68,10 +69,13 @@ struct drm_file; #include #include #include +#include #include #include #include +#include #include +#include #include #include #include @@ -224,7 +228,7 @@ enum { #define DRM_MTRR_WC MDF_WRITECOMBINE #define jiffies ticks -typedef unsigned long dma_addr_t; +typedef vm_paddr_t dma_addr_t; typedef u_int64_t u64; typedef u_int32_t u32; typedef u_int16_t u16; @@ -239,22 +243,22 @@ typedef u_int8_t u8; #define DRM_MEMORYBARRIER() mb() #define DRM_READ8(map, offset) \ - *(volatile u_int8_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int8_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) #define DRM_READ16(map, offset) \ - *(volatile u_int16_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int16_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) #define DRM_READ32(map, offset) \ - *(volatile u_int32_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int32_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) #define DRM_WRITE8(map, offset, val) \ - *(volatile u_int8_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int8_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) = val #define DRM_WRITE16(map, offset, val) \ - *(volatile u_int16_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int16_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) = val #define DRM_WRITE32(map, offset, val) \ - *(volatile u_int32_t *)(((vm_offset_t)(map)->handle) + \ + *(volatile u_int32_t *)(((vm_offset_t)(map)->virtual) + \ (vm_offset_t)(offset)) = val #define DRM_VERIFYAREA_READ( uaddr, size ) \ @@ -474,25 +478,26 @@ typedef struct drm_agp_head { } drm_agp_head_t; typedef struct drm_sg_mem { - unsigned long handle; - void *virtual; - int pages; - dma_addr_t *busaddr; - struct drm_dma_handle *dmah; /* Handle to PCI memory */ + vm_offset_t vaddr; + vm_paddr_t *busaddr; + vm_pindex_t pages; } drm_sg_mem_t; +#define DRM_MAP_HANDLE_BITS (sizeof(void *) == 4 ? 4 : 24) +#define DRM_MAP_HANDLE_SHIFT (sizeof(void *) * 8 - DRM_MAP_HANDLE_BITS) typedef TAILQ_HEAD(drm_map_list, drm_local_map) drm_map_list_t; typedef struct drm_local_map { - unsigned long offset; /* Physical address (0 for SAREA)*/ - unsigned long size; /* Physical size (bytes) */ - enum drm_map_type type; /* Type of memory mapped */ - enum drm_map_flags flags; /* Flags */ - void *handle; /* User-space: "Handle" to pass to mmap */ - /* Kernel-space: kernel-virtual address */ - int mtrr; /* Boolean: MTRR used */ - /* Private data */ - int rid; /* PCI resource ID for bus_space */ + unsigned long offset; /* Physical address (0 for SAREA) */ + unsigned long size; /* Physical size (bytes) */ + enum drm_map_type type; /* Type of memory mapped */ + enum drm_map_flags flags; /* Flags */ + void *handle; /* User-space: "Handle" to pass to mmap */ + /* Kernel-space: kernel-virtual address */ + int mtrr; /* Boolean: MTRR used */ + /* Private data */ + int rid; /* PCI resource ID for bus_space */ + void *virtual; /* Kernel-space: kernel-virtual address */ struct resource *bsr; bus_space_tag_t bst; bus_space_handle_t bsh; @@ -643,6 +648,7 @@ struct drm_device { /* Linked list of mappable regions. Protected by dev_lock */ drm_map_list_t maplist; + struct unrhdr *map_unrhdr; drm_local_map_t **context_sareas; int max_context; @@ -973,17 +979,17 @@ drm_free(void *pt, size_t size, struct malloc_type *area) static __inline__ void drm_core_ioremap_wc(struct drm_local_map *map, struct drm_device *dev) { - map->handle = drm_ioremap_wc(dev, map); + map->virtual = drm_ioremap_wc(dev, map); } static __inline__ void drm_core_ioremap(struct drm_local_map *map, struct drm_device *dev) { - map->handle = drm_ioremap(dev, map); + map->virtual = drm_ioremap(dev, map); } static __inline__ void drm_core_ioremapfree(struct drm_local_map *map, struct drm_device *dev) { - if ( map->handle && map->size ) + if ( map->virtual && map->size ) drm_ioremapfree(map); } @@ -994,7 +1000,7 @@ drm_core_findmap(struct drm_device *dev, unsigned long offset) DRM_SPINLOCK_ASSERT(&dev->dev_lock); TAILQ_FOREACH(map, &dev->maplist, link) { - if (map->offset == offset) + if (offset == (unsigned long)map->handle) return map; } return NULL; diff --git a/sys/dev/drm/drm_bufs.c b/sys/dev/drm/drm_bufs.c index bd31b0ae31a..2d27cd4b88b 100644 --- a/sys/dev/drm/drm_bufs.c +++ b/sys/dev/drm/drm_bufs.c @@ -156,10 +156,12 @@ int drm_addmap(struct drm_device * dev, unsigned long offset, map->size = size; map->type = type; map->flags = flags; + map->handle = (void *)((unsigned long)alloc_unr(dev->map_unrhdr) << + DRM_MAP_HANDLE_SHIFT); switch (map->type) { case _DRM_REGISTERS: - map->handle = drm_ioremap(dev, map); + map->virtual = drm_ioremap(dev, map); if (!(map->flags & _DRM_WRITE_COMBINING)) break; /* FALLTHROUGH */ @@ -168,25 +170,25 @@ int drm_addmap(struct drm_device * dev, unsigned long offset, map->mtrr = 1; break; case _DRM_SHM: - map->handle = malloc(map->size, DRM_MEM_MAPS, M_NOWAIT); + map->virtual = malloc(map->size, DRM_MEM_MAPS, M_NOWAIT); DRM_DEBUG("%lu %d %p\n", - map->size, drm_order(map->size), map->handle); - if (!map->handle) { + map->size, drm_order(map->size), map->virtual); + if (!map->virtual) { free(map, DRM_MEM_MAPS); DRM_LOCK(); return ENOMEM; } - map->offset = (unsigned long)map->handle; + map->offset = (unsigned long)map->virtual; if (map->flags & _DRM_CONTAINS_LOCK) { /* Prevent a 2nd X Server from creating a 2nd lock */ DRM_LOCK(); if (dev->lock.hw_lock != NULL) { DRM_UNLOCK(); - free(map->handle, DRM_MEM_MAPS); + free(map->virtual, DRM_MEM_MAPS); free(map, DRM_MEM_MAPS); return EBUSY; } - dev->lock.hw_lock = map->handle; /* Pointer to lock */ + dev->lock.hw_lock = map->virtual; /* Pointer to lock */ DRM_UNLOCK(); } break; @@ -224,7 +226,8 @@ int drm_addmap(struct drm_device * dev, unsigned long offset, DRM_LOCK(); return EINVAL; } - map->offset += dev->sg->handle; + map->virtual = (void *)(dev->sg->vaddr + offset); + map->offset = dev->sg->vaddr + offset; break; case _DRM_CONSISTENT: /* Unfortunately, we don't get any alignment specification from @@ -242,7 +245,7 @@ int drm_addmap(struct drm_device * dev, unsigned long offset, DRM_LOCK(); return ENOMEM; } - map->handle = map->dmah->vaddr; + map->virtual = map->dmah->vaddr; map->offset = map->dmah->busaddr; break; default: @@ -291,11 +294,7 @@ int drm_addmap_ioctl(struct drm_device *dev, void *data, request->type = map->type; request->flags = map->flags; request->mtrr = map->mtrr; - request->handle = map->handle; - - if (request->type != _DRM_SHM) { - request->handle = (void *)request->offset; - } + request->handle = (void *)map->handle; return 0; } @@ -324,7 +323,7 @@ void drm_rmmap(struct drm_device *dev, drm_local_map_t *map) } break; case _DRM_SHM: - free(map->handle, DRM_MEM_MAPS); + free(map->virtual, DRM_MEM_MAPS); break; case _DRM_AGP: case _DRM_SCATTER_GATHER: @@ -342,6 +341,12 @@ void drm_rmmap(struct drm_device *dev, drm_local_map_t *map) map->bsr); } + DRM_UNLOCK(); + if (map->handle) + free_unr(dev->map_unrhdr, (unsigned long)map->handle >> + DRM_MAP_HANDLE_SHIFT); + DRM_LOCK(); + free(map, DRM_MEM_MAPS); } @@ -739,7 +744,7 @@ static int drm_do_addbufs_sg(struct drm_device *dev, struct drm_buf_desc *reques buf->offset = (dma->byte_count + offset); buf->bus_address = agp_offset + offset; - buf->address = (void *)(agp_offset + offset + dev->sg->handle); + buf->address = (void *)(agp_offset + offset + dev->sg->vaddr); buf->next = NULL; buf->pending = 0; buf->file_priv = NULL; @@ -1054,7 +1059,7 @@ int drm_mapbufs(struct drm_device *dev, void *data, struct drm_file *file_priv) goto done; } size = round_page(map->size); - foff = map->offset; + foff = (unsigned long)map->handle; } else { size = round_page(dma->byte_count), foff = 0; diff --git a/sys/dev/drm/drm_context.c b/sys/dev/drm/drm_context.c index 398008bc187..3c13b43f9a7 100644 --- a/sys/dev/drm/drm_context.c +++ b/sys/dev/drm/drm_context.c @@ -147,7 +147,7 @@ int drm_getsareactx(struct drm_device *dev, void *data, map = dev->context_sareas[request->ctx_id]; DRM_UNLOCK(); - request->handle = map->handle; + request->handle = (void *)map->handle; return 0; } diff --git a/sys/dev/drm/drm_drv.c b/sys/dev/drm/drm_drv.c index c690c34dfa3..8d9bc69e0c4 100644 --- a/sys/dev/drm/drm_drv.c +++ b/sys/dev/drm/drm_drv.c @@ -434,6 +434,12 @@ static int drm_load(struct drm_device *dev) DRM_DEBUG("\n"); TAILQ_INIT(&dev->maplist); + dev->map_unrhdr = new_unrhdr(1, ((1 << DRM_MAP_HANDLE_BITS) - 1), NULL); + if (dev->map_unrhdr == NULL) { + DRM_ERROR("Couldn't allocate map number allocator\n"); + return EINVAL; + } + drm_mem_init(); drm_sysctl_init(dev); @@ -565,6 +571,7 @@ static void drm_unload(struct drm_device *dev) } delete_unrhdr(dev->drw_unrhdr); + delete_unrhdr(dev->map_unrhdr); drm_mem_uninit(); diff --git a/sys/dev/drm/drm_hashtab.c b/sys/dev/drm/drm_hashtab.c index e98f1026cf5..360c02bfada 100644 --- a/sys/dev/drm/drm_hashtab.c +++ b/sys/dev/drm/drm_hashtab.c @@ -46,7 +46,8 @@ int drm_ht_create(struct drm_open_hash *ht, unsigned int order) ht->size = 1 << order; ht->order = order; ht->table = NULL; - ht->table = hashinit(ht->size, DRM_MEM_HASHTAB, &ht->mask); + ht->table = hashinit_flags(ht->size, DRM_MEM_HASHTAB, &ht->mask, + HASH_NOWAIT); if (!ht->table) { DRM_ERROR("Out of memory for hash table\n"); return -ENOMEM; diff --git a/sys/dev/drm/drm_memory.c b/sys/dev/drm/drm_memory.c index 8eaf4d1e9e2..415b7741f71 100644 --- a/sys/dev/drm/drm_memory.c +++ b/sys/dev/drm/drm_memory.c @@ -83,7 +83,7 @@ void *drm_ioremap(struct drm_device *dev, drm_local_map_t *map) void drm_ioremapfree(drm_local_map_t *map) { - pmap_unmapdev((vm_offset_t) map->handle, map->size); + pmap_unmapdev((vm_offset_t) map->virtual, map->size); } int diff --git a/sys/dev/drm/drm_mm.c b/sys/dev/drm/drm_mm.c index 344436fd22d..bab36c1117a 100644 --- a/sys/dev/drm/drm_mm.c +++ b/sys/dev/drm/drm_mm.c @@ -333,7 +333,8 @@ int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size) mm->num_unused = 0; mtx_init(&mm->unused_lock, "drm_unused", NULL, MTX_DEF); - return drm_mm_create_tail_node(mm, start, size, 0); + /* XXX This could be non-atomic but gets called from a locked path */ + return drm_mm_create_tail_node(mm, start, size, 1); } void drm_mm_takedown(struct drm_mm * mm) diff --git a/sys/dev/drm/drm_scatter.c b/sys/dev/drm/drm_scatter.c index b3ab63b630c..9a1a4b1e1d3 100644 --- a/sys/dev/drm/drm_scatter.c +++ b/sys/dev/drm/drm_scatter.c @@ -1,5 +1,5 @@ /*- - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009 Robert C. Noland III * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -20,11 +20,6 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Gareth Hughes - * Eric Anholt - * */ #include @@ -32,96 +27,58 @@ __FBSDID("$FreeBSD$"); /** @file drm_scatter.c * Allocation of memory for scatter-gather mappings by the graphics chip. - * * The memory allocated here is then made into an aperture in the card - * by drm_ati_pcigart_init(). + * by mapping the pages into the GART. */ #include "dev/drm/drmP.h" -static void drm_sg_alloc_cb(void *arg, bus_dma_segment_t *segs, - int nsegs, int error); - int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather *request) { struct drm_sg_mem *entry; - struct drm_dma_handle *dmah; - int ret; + vm_size_t size; + vm_pindex_t pindex; if (dev->sg) return EINVAL; - entry = malloc(sizeof(*entry), DRM_MEM_SGLISTS, M_WAITOK | M_ZERO); - entry->pages = round_page(request->size) / PAGE_SIZE; - DRM_DEBUG("sg size=%ld pages=%d\n", request->size, entry->pages); + DRM_DEBUG("request size=%ld\n", request->size); + entry = malloc(sizeof(*entry), DRM_MEM_DRIVER, M_WAITOK | M_ZERO); + + size = round_page(request->size); + entry->pages = OFF_TO_IDX(size); entry->busaddr = malloc(entry->pages * sizeof(*entry->busaddr), - DRM_MEM_PAGES, M_WAITOK | M_ZERO); - dmah = malloc(sizeof(struct drm_dma_handle), DRM_MEM_DMA, - M_WAITOK | M_ZERO); - entry->dmah = dmah; + DRM_MEM_SGLISTS, M_WAITOK | M_ZERO); - ret = bus_dma_tag_create(NULL, PAGE_SIZE, 0, /* tag, align, boundary */ - BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, /* lowaddr, highaddr */ - NULL, NULL, /* filtfunc, filtfuncargs */ - request->size, entry->pages, /* maxsize, nsegs */ - PAGE_SIZE, 0, /* maxsegsize, flags */ - NULL, NULL, /* lockfunc, lockfuncargs */ - &dmah->tag); - if (ret != 0) { + entry->vaddr = kmem_alloc_attr(kernel_map, size, M_WAITOK | M_ZERO, + 0, BUS_SPACE_MAXADDR_32BIT, VM_MEMATTR_WRITE_COMBINING); + if (entry->vaddr == 0) { drm_sg_cleanup(entry); - return ENOMEM; + return (ENOMEM); } - ret = bus_dmamem_alloc(dmah->tag, &dmah->vaddr, - BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_NOCACHE, &dmah->map); - if (ret != 0) { - drm_sg_cleanup(entry); - return ENOMEM; - } - - entry->handle = (unsigned long)dmah->vaddr; - entry->virtual = dmah->vaddr; - - ret = bus_dmamap_load(dmah->tag, dmah->map, dmah->vaddr, - request->size, drm_sg_alloc_cb, entry, BUS_DMA_NOWAIT); - if (ret != 0) { - drm_sg_cleanup(entry); - return ENOMEM; + for(pindex = 0; pindex < entry->pages; pindex++) { + entry->busaddr[pindex] = + vtophys(entry->vaddr + IDX_TO_OFF(pindex)); } DRM_LOCK(); if (dev->sg) { DRM_UNLOCK(); drm_sg_cleanup(entry); - return EINVAL; + return (EINVAL); } dev->sg = entry; DRM_UNLOCK(); - DRM_DEBUG("handle=%08lx, kva=%p, contents=%08lx\n", entry->handle, - entry->virtual, *(unsigned long *)entry->virtual); + request->handle = entry->vaddr; - request->handle = entry->handle; + DRM_DEBUG("allocated %ju pages @ 0x%08zx, contents=%08lx\n", + entry->pages, entry->vaddr, *(unsigned long *)entry->vaddr); - return 0; -} - -static void -drm_sg_alloc_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) -{ - struct drm_sg_mem *entry = arg; - int i; - - if (error != 0) - return; - - for(i = 0 ; i < nsegs ; i++) { - entry->busaddr[i] = segs[i].ds_addr; - DRM_DEBUG("segment %d @ 0x%016lx\n", i, - (unsigned long)segs[i].ds_addr); - } + return (0); } int @@ -132,23 +89,22 @@ drm_sg_alloc_ioctl(struct drm_device *dev, void *data, DRM_DEBUG("\n"); - return drm_sg_alloc(dev, request); + return (drm_sg_alloc(dev, request)); } void drm_sg_cleanup(struct drm_sg_mem *entry) { - struct drm_dma_handle *dmah = entry->dmah; + if (entry == NULL) + return; - if (dmah->map != NULL) - bus_dmamap_unload(dmah->tag, dmah->map); - if (dmah->vaddr != NULL) - bus_dmamem_free(dmah->tag, dmah->vaddr, dmah->map); - if (dmah->tag != NULL) - bus_dma_tag_destroy(dmah->tag); - free(dmah, DRM_MEM_DMA); - free(entry->busaddr, DRM_MEM_PAGES); - free(entry, DRM_MEM_SGLISTS); + if (entry->vaddr != 0) + kmem_free(kernel_map, entry->vaddr, IDX_TO_OFF(entry->pages)); + + free(entry->busaddr, DRM_MEM_SGLISTS); + free(entry, DRM_MEM_DRIVER); + + return; } int @@ -162,12 +118,12 @@ drm_sg_free(struct drm_device *dev, void *data, struct drm_file *file_priv) dev->sg = NULL; DRM_UNLOCK(); - if (!entry || entry->handle != request->handle) - return EINVAL; + if (!entry || entry->vaddr != request->handle) + return (EINVAL); - DRM_DEBUG("sg free virtual = 0x%lx\n", entry->handle); + DRM_DEBUG("free 0x%zx\n", entry->vaddr); drm_sg_cleanup(entry); - return 0; + return (0); } diff --git a/sys/dev/drm/drm_sman.c b/sys/dev/drm/drm_sman.c index 9f1132ce2b2..32f9eb9e465 100644 --- a/sys/dev/drm/drm_sman.c +++ b/sys/dev/drm/drm_sman.c @@ -96,7 +96,8 @@ static void *drm_sman_mm_allocate(void *private, unsigned long size, if (!tmp) { return NULL; } - tmp = drm_mm_get_block(tmp, size, alignment); + /* This could be non-atomic, but we are called from a locked path */ + tmp = drm_mm_get_block_atomic(tmp, size, alignment); return tmp; } @@ -131,7 +132,7 @@ drm_sman_set_range(struct drm_sman * sman, unsigned int manager, KASSERT(manager < sman->num_managers, ("Invalid manager")); sman_mm = &sman->mm[manager]; - mm = malloc(sizeof(*mm), DRM_MEM_MM, M_WAITOK | M_ZERO); + mm = malloc(sizeof(*mm), DRM_MEM_MM, M_NOWAIT | M_ZERO); if (!mm) { return -ENOMEM; } @@ -174,7 +175,7 @@ static struct drm_owner_item *drm_sman_get_owner_item(struct drm_sman * sman, owner_hash); } - owner_item = malloc(sizeof(*owner_item), DRM_MEM_MM, M_WAITOK | M_ZERO); + owner_item = malloc(sizeof(*owner_item), DRM_MEM_MM, M_NOWAIT | M_ZERO); if (!owner_item) goto out; @@ -206,12 +207,11 @@ struct drm_memblock_item *drm_sman_alloc(struct drm_sman *sman, unsigned int man sman_mm = &sman->mm[manager]; tmp = sman_mm->allocate(sman_mm->private, size, alignment); - if (!tmp) { return NULL; } - memblock = malloc(sizeof(*memblock), DRM_MEM_MM, M_WAITOK | M_ZERO); + memblock = malloc(sizeof(*memblock), DRM_MEM_MM, M_NOWAIT | M_ZERO); DRM_DEBUG("allocated mem_block %p\n", memblock); if (!memblock) goto out; diff --git a/sys/dev/drm/drm_sysctl.c b/sys/dev/drm/drm_sysctl.c index cc332833fd6..4d9b0e8f0e0 100644 --- a/sys/dev/drm/drm_sysctl.c +++ b/sys/dev/drm/drm_sysctl.c @@ -188,7 +188,7 @@ static int drm_vm_info DRM_SYSCTL_HANDLER_ARGS DRM_UNLOCK(); DRM_SYSCTL_PRINT("\nslot offset size " - "type flags address mtrr\n"); + "type flags address handle mtrr\n"); for (i = 0; i < mapcount; i++) { map = &tempmaps[i]; @@ -204,9 +204,11 @@ static int drm_vm_info DRM_SYSCTL_HANDLER_ARGS yesno = "yes"; DRM_SYSCTL_PRINT( - "%4d 0x%016lx 0x%08lx %4.4s 0x%02x 0x%016lx %s\n", i, - map->offset, map->size, type, map->flags, - (unsigned long)map->handle, yesno); + "%4d 0x%016lx 0x%08lx %4.4s 0x%02x 0x%016lx %6d %s\n", + i, map->offset, map->size, type, map->flags, + (unsigned long)map->virtual, + (unsigned int)((unsigned long)map->handle >> + DRM_MAP_HANDLE_SHIFT), yesno); } SYSCTL_OUT(req, "", 1); diff --git a/sys/dev/drm/drm_vm.c b/sys/dev/drm/drm_vm.c index 8f260fd9b10..798685607af 100644 --- a/sys/dev/drm/drm_vm.c +++ b/sys/dev/drm/drm_vm.c @@ -54,6 +54,7 @@ int drm_mmap(struct cdev *kdev, vm_ooffset_t offset, vm_paddr_t *paddr, if (file_priv && !file_priv->authenticated) return EACCES; + DRM_DEBUG("called with offset %016jx\n", offset); if (dev->dma && offset < ptoa(dev->dma->page_count)) { drm_device_dma_t *dma = dev->dma; @@ -72,31 +73,31 @@ int drm_mmap(struct cdev *kdev, vm_ooffset_t offset, vm_paddr_t *paddr, } } - /* A sequential search of a linked list is - fine here because: 1) there will only be - about 5-10 entries in the list and, 2) a - DRI client only has to do this mapping - once, so it doesn't have to be optimized - for performance, even if the list was a - bit longer. */ + /* A sequential search of a linked list is + fine here because: 1) there will only be + about 5-10 entries in the list and, 2) a + DRI client only has to do this mapping + once, so it doesn't have to be optimized + for performance, even if the list was a + bit longer. + */ DRM_LOCK(); TAILQ_FOREACH(map, &dev->maplist, link) { - if (offset >= map->offset && offset < map->offset + map->size) + if (offset >> DRM_MAP_HANDLE_SHIFT == + (unsigned long)map->handle >> DRM_MAP_HANDLE_SHIFT) break; } if (map == NULL) { - DRM_DEBUG("Can't find map, requested offset = %016lx\n", - (unsigned long)offset); + DRM_DEBUG("Can't find map, request offset = %016jx\n", offset); TAILQ_FOREACH(map, &dev->maplist, link) { DRM_DEBUG("map offset = %016lx, handle = %016lx\n", - (unsigned long)map->offset, - (unsigned long)map->handle); + map->offset, (unsigned long)map->handle); } DRM_UNLOCK(); return -1; } - if (((map->flags&_DRM_RESTRICTED) && !DRM_SUSER(DRM_CURPROC))) { + if (((map->flags & _DRM_RESTRICTED) && !DRM_SUSER(DRM_CURPROC))) { DRM_UNLOCK(); DRM_DEBUG("restricted map\n"); return -1; @@ -104,18 +105,22 @@ int drm_mmap(struct cdev *kdev, vm_ooffset_t offset, vm_paddr_t *paddr, type = map->type; DRM_UNLOCK(); + offset = offset & ((1ULL << DRM_MAP_HANDLE_SHIFT) - 1); + switch (type) { case _DRM_FRAME_BUFFER: - case _DRM_REGISTERS: case _DRM_AGP: - phys = offset; - break; - case _DRM_CONSISTENT: - phys = vtophys((char *)map->handle + (offset - map->offset)); + *memattr = VM_MEMATTR_WRITE_COMBINING; + /* FALLTHROUGH */ + case _DRM_REGISTERS: + phys = map->offset + offset; break; case _DRM_SCATTER_GATHER: + *memattr = VM_MEMATTR_WRITE_COMBINING; + /* FALLTHROUGH */ + case _DRM_CONSISTENT: case _DRM_SHM: - phys = vtophys(offset); + phys = vtophys((char *)map->virtual + offset); break; default: DRM_ERROR("bad map type %d\n", type); diff --git a/sys/dev/drm/i915_dma.c b/sys/dev/drm/i915_dma.c index 386c058b351..7f8ddc194ed 100644 --- a/sys/dev/drm/i915_dma.c +++ b/sys/dev/drm/i915_dma.c @@ -151,7 +151,7 @@ static int i915_dma_cleanup(struct drm_device * dev) if (dev_priv->ring.virtual_start) { drm_core_ioremapfree(&dev_priv->ring.map, dev); dev_priv->ring.virtual_start = NULL; - dev_priv->ring.map.handle = NULL; + dev_priv->ring.map.virtual = NULL; dev_priv->ring.map.size = 0; } @@ -174,7 +174,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) } dev_priv->sarea_priv = (drm_i915_sarea_t *) - ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset); + ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); if (init->ring_size != 0) { if (dev_priv->ring.ring_obj != NULL) { @@ -195,7 +195,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) drm_core_ioremap_wc(&dev_priv->ring.map, dev); - if (dev_priv->ring.map.handle == NULL) { + if (dev_priv->ring.map.virtual == NULL) { i915_dma_cleanup(dev); DRM_ERROR("can not ioremap virtual address for" " ring buffer\n"); @@ -203,7 +203,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) } } - dev_priv->ring.virtual_start = dev_priv->ring.map.handle; + dev_priv->ring.virtual_start = dev_priv->ring.map.virtual; dev_priv->cpp = init->cpp; dev_priv->back_offset = init->back_offset; @@ -229,7 +229,7 @@ static int i915_dma_resume(struct drm_device * dev) return -EINVAL; } - if (dev_priv->ring.map.handle == NULL) { + if (dev_priv->ring.map.virtual == NULL) { DRM_ERROR("can not ioremap virtual address for" " ring buffer\n"); return -ENOMEM; @@ -823,14 +823,14 @@ static int i915_set_status_page(struct drm_device *dev, void *data, dev_priv->hws_map.mtrr = 0; drm_core_ioremap_wc(&dev_priv->hws_map, dev); - if (dev_priv->hws_map.handle == NULL) { + if (dev_priv->hws_map.virtual == NULL) { i915_dma_cleanup(dev); dev_priv->status_gfx_addr = 0; DRM_ERROR("can not ioremap virtual address for" " G33 hw status page\n"); return -ENOMEM; } - dev_priv->hw_status_page = dev_priv->hws_map.handle; + dev_priv->hw_status_page = dev_priv->hws_map.virtual; memset(dev_priv->hw_status_page, 0, PAGE_SIZE); I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); diff --git a/sys/dev/drm/mach64_dma.c b/sys/dev/drm/mach64_dma.c index 6068c85b3fe..9aa0faa1981 100644 --- a/sys/dev/drm/mach64_dma.c +++ b/sys/dev/drm/mach64_dma.c @@ -1078,11 +1078,11 @@ static int mach64_do_dma_init(struct drm_device * dev, drm_mach64_init_t * init) } dev_priv->sarea_priv = (drm_mach64_sarea_t *) - ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset); + ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); if (!dev_priv->is_pci) { drm_core_ioremap(dev_priv->ring_map, dev); - if (!dev_priv->ring_map->handle) { + if (!dev_priv->ring_map->virtual) { DRM_ERROR("can not ioremap virtual address for" " descriptor ring\n"); dev->dev_private = (void *)dev_priv; @@ -1103,7 +1103,7 @@ static int mach64_do_dma_init(struct drm_device * dev, drm_mach64_init_t * init) dev_priv->dev_buffers = dev->agp_buffer_map; drm_core_ioremap(dev->agp_buffer_map, dev); - if (!dev->agp_buffer_map->handle) { + if (!dev->agp_buffer_map->virtual) { DRM_ERROR("can not ioremap virtual address for" " dma buffer\n"); dev->dev_private = (void *)dev_priv; @@ -1147,7 +1147,7 @@ static int mach64_do_dma_init(struct drm_device * dev, drm_mach64_init_t * init) } dev_priv->ring.size = 0x4000; /* 16KB */ - dev_priv->ring.start = dev_priv->ring_map->handle; + dev_priv->ring.start = dev_priv->ring_map->virtual; dev_priv->ring.start_addr = (u32) dev_priv->ring_map->offset; memset(dev_priv->ring.start, 0, dev_priv->ring.size); diff --git a/sys/dev/drm/mga_dma.c b/sys/dev/drm/mga_dma.c index e081e2c72c3..71775b65b19 100644 --- a/sys/dev/drm/mga_dma.c +++ b/sys/dev/drm/mga_dma.c @@ -585,11 +585,11 @@ static int mga_do_agp_dma_bootstrap(struct drm_device *dev, drm_core_ioremap(dev_priv->primary, dev); drm_core_ioremap(dev->agp_buffer_map, dev); - if (!dev_priv->warp->handle || - !dev_priv->primary->handle || !dev->agp_buffer_map->handle) { + if (!dev_priv->warp->virtual || + !dev_priv->primary->virtual || !dev->agp_buffer_map->virtual) { DRM_ERROR("failed to ioremap agp regions! (%p, %p, %p)\n", - dev_priv->warp->handle, dev_priv->primary->handle, - dev->agp_buffer_map->handle); + dev_priv->warp->virtual, dev_priv->primary->virtual, + dev->agp_buffer_map->virtual); return -ENOMEM; } @@ -878,14 +878,14 @@ static int mga_do_init_dma(struct drm_device * dev, drm_mga_init_t * init) } dev_priv->sarea_priv = - (drm_mga_sarea_t *) ((u8 *) dev_priv->sarea->handle + + (drm_mga_sarea_t *) ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); - if (!dev_priv->warp->handle || - !dev_priv->primary->handle || + if (!dev_priv->warp->virtual || + !dev_priv->primary->virtual || ((dev_priv->dma_access != 0) && ((dev->agp_buffer_map == NULL) || - (dev->agp_buffer_map->handle == NULL)))) { + (dev->agp_buffer_map->virtual == NULL)))) { DRM_ERROR("failed to ioremap agp regions!\n"); return -ENOMEM; } @@ -902,7 +902,7 @@ static int mga_do_init_dma(struct drm_device * dev, drm_mga_init_t * init) return ret; } - dev_priv->prim.status = (u32 *) dev_priv->status->handle; + dev_priv->prim.status = (u32 *) dev_priv->status->virtual; mga_do_wait_for_idle(dev_priv); @@ -910,8 +910,8 @@ static int mga_do_init_dma(struct drm_device * dev, drm_mga_init_t * init) */ MGA_WRITE(MGA_PRIMADDRESS, dev_priv->primary->offset | MGA_DMA_GENERAL); - dev_priv->prim.start = (u8 *) dev_priv->primary->handle; - dev_priv->prim.end = ((u8 *) dev_priv->primary->handle + dev_priv->prim.start = (u8 *) dev_priv->primary->virtual; + dev_priv->prim.end = ((u8 *) dev_priv->primary->virtual + dev_priv->primary->size); dev_priv->prim.size = dev_priv->primary->size; diff --git a/sys/dev/drm/mga_warp.c b/sys/dev/drm/mga_warp.c index dd3e7346931..98c1615f509 100644 --- a/sys/dev/drm/mga_warp.c +++ b/sys/dev/drm/mga_warp.c @@ -96,7 +96,7 @@ unsigned int mga_warp_microcode_size(const drm_mga_private_t * dev_priv) static int mga_warp_install_g400_microcode(drm_mga_private_t * dev_priv) { - unsigned char *vcbase = dev_priv->warp->handle; + unsigned char *vcbase = dev_priv->warp->virtual; unsigned long pcbase = dev_priv->warp->offset; memset(dev_priv->warp_pipe_phys, 0, sizeof(dev_priv->warp_pipe_phys)); @@ -124,7 +124,7 @@ static int mga_warp_install_g400_microcode(drm_mga_private_t * dev_priv) static int mga_warp_install_g200_microcode(drm_mga_private_t * dev_priv) { - unsigned char *vcbase = dev_priv->warp->handle; + unsigned char *vcbase = dev_priv->warp->virtual; unsigned long pcbase = dev_priv->warp->offset; memset(dev_priv->warp_pipe_phys, 0, sizeof(dev_priv->warp_pipe_phys)); diff --git a/sys/dev/drm/r128_cce.c b/sys/dev/drm/r128_cce.c index c799bd1d804..2bda4a58fd3 100644 --- a/sys/dev/drm/r128_cce.c +++ b/sys/dev/drm/r128_cce.c @@ -327,8 +327,7 @@ static void r128_cce_init_ring_buffer(struct drm_device * dev, ring_start = dev_priv->cce_ring->offset - dev->agp->base; else #endif - ring_start = dev_priv->cce_ring->offset - - (unsigned long)dev->sg->virtual; + ring_start = dev_priv->cce_ring->offset - dev->sg->vaddr; R128_WRITE(R128_PM4_BUFFER_OFFSET, ring_start | R128_AGP_OFFSET); @@ -509,7 +508,7 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) } dev_priv->sarea_priv = - (drm_r128_sarea_t *) ((u8 *) dev_priv->sarea->handle + + (drm_r128_sarea_t *) ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); #if __OS_HAS_AGP @@ -517,9 +516,9 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) drm_core_ioremap(dev_priv->cce_ring, dev); drm_core_ioremap(dev_priv->ring_rptr, dev); drm_core_ioremap(dev->agp_buffer_map, dev); - if (!dev_priv->cce_ring->handle || - !dev_priv->ring_rptr->handle || - !dev->agp_buffer_map->handle) { + if (!dev_priv->cce_ring->virtual || + !dev_priv->ring_rptr->virtual || + !dev->agp_buffer_map->virtual) { DRM_ERROR("Could not ioremap agp regions!\n"); dev->dev_private = (void *)dev_priv; r128_do_cleanup_cce(dev); @@ -528,10 +527,11 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) } else #endif { - dev_priv->cce_ring->handle = (void *)dev_priv->cce_ring->offset; - dev_priv->ring_rptr->handle = + dev_priv->cce_ring->virtual = + (void *)dev_priv->cce_ring->offset; + dev_priv->ring_rptr->virtual = (void *)dev_priv->ring_rptr->offset; - dev->agp_buffer_map->handle = + dev->agp_buffer_map->virtual = (void *)dev->agp_buffer_map->offset; } @@ -540,10 +540,10 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) dev_priv->cce_buffers_offset = dev->agp->base; else #endif - dev_priv->cce_buffers_offset = (unsigned long)dev->sg->virtual; + dev_priv->cce_buffers_offset = dev->sg->vaddr; - dev_priv->ring.start = (u32 *) dev_priv->cce_ring->handle; - dev_priv->ring.end = ((u32 *) dev_priv->cce_ring->handle + dev_priv->ring.start = (u32 *) dev_priv->cce_ring->virtual; + dev_priv->ring.end = ((u32 *) dev_priv->cce_ring->virtual + init->ring_size / sizeof(u32)); dev_priv->ring.size = init->ring_size; dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); diff --git a/sys/dev/drm/r128_state.c b/sys/dev/drm/r128_state.c index 6aef11cdb79..9632dec2a68 100644 --- a/sys/dev/drm/r128_state.c +++ b/sys/dev/drm/r128_state.c @@ -657,7 +657,7 @@ static void r128_cce_dispatch_indirect(struct drm_device * dev, */ if (dwords & 1) { u32 *data = (u32 *) - ((char *)dev->agp_buffer_map->handle + ((char *)dev->agp_buffer_map->virtual + buf->offset + start); data[dwords++] = cpu_to_le32(R128_CCE_PACKET2); } @@ -722,7 +722,7 @@ static void r128_cce_dispatch_indices(struct drm_device * dev, dwords = (end - start + 3) / sizeof(u32); - data = (u32 *) ((char *)dev->agp_buffer_map->handle + data = (u32 *) ((char *)dev->agp_buffer_map->virtual + buf->offset + start); data[0] = cpu_to_le32(CCE_PACKET3(R128_3D_RNDR_GEN_INDX_PRIM, diff --git a/sys/dev/drm/r600_blit.c b/sys/dev/drm/r600_blit.c index a26f2c445b9..d3c41ae9fc5 100644 --- a/sys/dev/drm/r600_blit.c +++ b/sys/dev/drm/r600_blit.c @@ -1290,8 +1290,8 @@ set_shaders(struct drm_device *dev) DRM_DEBUG("\n"); /* load shaders */ - vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset); - ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256); + vs = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset); + ps = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + 256); shader_size = sizeof(r6xx_vs) / 4; for (i= 0; i < shader_size; i++) @@ -1718,11 +1718,10 @@ r600_blit_copy(struct drm_device *dev, u64 vb_addr; u32 *vb; - vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + vb = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); - DRM_DEBUG("src=0x%016llx, dst=0x%016llx, size=%d\n", - (unsigned long long)src_gpu_addr, - (unsigned long long)dst_gpu_addr, size_bytes); + DRM_DEBUG("src=0x%016jx, dst=0x%016jx, size=%d\n", + src_gpu_addr, dst_gpu_addr, size_bytes); if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { max_bytes = 8192; @@ -1759,7 +1758,7 @@ r600_blit_copy(struct drm_device *dev, if (!dev_priv->blit_vb) return; set_shaders(dev); - vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + vb = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); } @@ -1849,7 +1848,7 @@ r600_blit_copy(struct drm_device *dev, if (!dev_priv->blit_vb) return; set_shaders(dev); - vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + vb = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); } @@ -1928,7 +1927,7 @@ r600_blit_swap(struct drm_device *dev, return; set_shaders(dev); } - vb = (u32 *) ((char *)dev->agp_buffer_map->handle + + vb = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + dev_priv->blit_vb->used); sx2 = sx + w; diff --git a/sys/dev/drm/r600_cp.c b/sys/dev/drm/r600_cp.c index 2a4a6bd873c..c2b8770acdd 100644 --- a/sys/dev/drm/r600_cp.c +++ b/sys/dev/drm/r600_cp.c @@ -180,7 +180,7 @@ int r600_page_table_init(struct drm_device *dev) entry_addr = entry->busaddr[i]; for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) { page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK; - page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; + page_base |= R600_PTE_VALID | R600_PTE_SYSTEM; page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE; *pci_gart = page_base; @@ -1670,9 +1670,8 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, } else #endif { - rptr_addr = dev_priv->ring_rptr->offset - - ((unsigned long) dev->sg->virtual) - + dev_priv->gart_vm_start; + rptr_addr = dev_priv->ring_rptr->offset - dev->sg->vaddr + + dev_priv->gart_vm_start; } RADEON_WRITE(R600_CP_RB_RPTR_ADDR, rptr_addr & 0xffffffff); @@ -1706,9 +1705,8 @@ static void r600_cp_init_ring_buffer(struct drm_device *dev, + dev_priv->gart_vm_start); } else #endif - ring_start = (dev_priv->cp_ring->offset - - (unsigned long)dev->sg->virtual - + dev_priv->gart_vm_start); + ring_start = dev_priv->cp_ring->offset - dev->sg->vaddr + + dev_priv->gart_vm_start; RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8); @@ -1914,7 +1912,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } dev_priv->sarea_priv = - (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle + + (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); #if __OS_HAS_AGP @@ -1923,9 +1921,9 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, drm_core_ioremap_wc(dev_priv->cp_ring, dev); drm_core_ioremap_wc(dev_priv->ring_rptr, dev); drm_core_ioremap_wc(dev->agp_buffer_map, dev); - if (!dev_priv->cp_ring->handle || - !dev_priv->ring_rptr->handle || - !dev->agp_buffer_map->handle) { + if (!dev_priv->cp_ring->virtual || + !dev_priv->ring_rptr->virtual || + !dev->agp_buffer_map->virtual) { DRM_ERROR("could not find ioremap agp regions!\n"); r600_do_cleanup_cp(dev); return -EINVAL; @@ -1933,18 +1931,19 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } else #endif { - dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; - dev_priv->ring_rptr->handle = + dev_priv->cp_ring->virtual = + (void *)dev_priv->cp_ring->offset; + dev_priv->ring_rptr->virtual = (void *)dev_priv->ring_rptr->offset; - dev->agp_buffer_map->handle = + dev->agp_buffer_map->virtual = (void *)dev->agp_buffer_map->offset; - DRM_DEBUG("dev_priv->cp_ring->handle %p\n", - dev_priv->cp_ring->handle); - DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", - dev_priv->ring_rptr->handle); - DRM_DEBUG("dev->agp_buffer_map->handle %p\n", - dev->agp_buffer_map->handle); + DRM_DEBUG("dev_priv->cp_ring->virtual %p\n", + dev_priv->cp_ring->virtual); + DRM_DEBUG("dev_priv->ring_rptr->virtual %p\n", + dev_priv->ring_rptr->virtual); + DRM_DEBUG("dev->agp_buffer_map->virtual %p\n", + dev->agp_buffer_map->virtual); } dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24; @@ -2011,9 +2010,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, + dev_priv->gart_vm_start); else #endif - dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset - - (unsigned long)dev->sg->virtual - + dev_priv->gart_vm_start); + dev_priv->gart_buffers_offset = dev->agp_buffer_map->offset - + dev->sg->vaddr + dev_priv->gart_vm_start; DRM_DEBUG("fb 0x%08x size %d\n", (unsigned int) dev_priv->fb_location, @@ -2024,8 +2022,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n", dev_priv->gart_buffers_offset); - dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; - dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle + dev_priv->ring.start = (u32 *) dev_priv->cp_ring->virtual; + dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->virtual + init->ring_size / sizeof(u32)); dev_priv->ring.size = init->ring_size; dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); @@ -2064,14 +2062,14 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, dev_priv->gart_info.table_size; drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); - if (!dev_priv->gart_info.mapping.handle) { + if (!dev_priv->gart_info.mapping.virtual) { DRM_ERROR("ioremap failed.\n"); r600_do_cleanup_cp(dev); return -EINVAL; } dev_priv->gart_info.addr = - dev_priv->gart_info.mapping.handle; + dev_priv->gart_info.mapping.virtual; DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n", dev_priv->gart_info.addr, @@ -2219,7 +2217,7 @@ int r600_cp_dispatch_indirect(struct drm_device *dev, */ while (dwords & 0xf) { u32 *data = (u32 *) - ((char *)dev->agp_buffer_map->handle + ((char *)dev->agp_buffer_map->virtual + buf->offset + start); data[dwords++] = RADEON_CP_PACKET2; } @@ -2343,7 +2341,8 @@ int r600_cp_dispatch_texture(struct drm_device * dev, /* Dispatch the indirect buffer. */ buffer = - (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + (u32 *) ((char *)dev->agp_buffer_map->virtual + + buf->offset); if (DRM_COPY_FROM_USER(buffer, data, pass_size)) { DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size); diff --git a/sys/dev/drm/radeon_cp.c b/sys/dev/drm/radeon_cp.c index 2e85f115a6d..0a486afa4bd 100644 --- a/sys/dev/drm/radeon_cp.c +++ b/sys/dev/drm/radeon_cp.c @@ -53,7 +53,7 @@ u32 radeon_read_ring_rptr(drm_radeon_private_t *dev_priv, u32 off) val = DRM_READ32(dev_priv->ring_rptr, off); } else { val = *(((volatile u32 *) - dev_priv->ring_rptr->handle) + + dev_priv->ring_rptr->virtual) + (off / sizeof(u32))); val = le32_to_cpu(val); } @@ -77,7 +77,7 @@ void radeon_write_ring_rptr(drm_radeon_private_t *dev_priv, u32 off, u32 val) if (dev_priv->flags & RADEON_IS_AGP) DRM_WRITE32(dev_priv->ring_rptr, off, val); else - *(((volatile u32 *) dev_priv->ring_rptr->handle) + + *(((volatile u32 *) dev_priv->ring_rptr->virtual) + (off / sizeof(u32))) = cpu_to_le32(val); } @@ -720,9 +720,8 @@ static void radeon_cp_init_ring_buffer(struct drm_device * dev, + dev_priv->gart_vm_start); } else #endif - ring_start = (dev_priv->cp_ring->offset - - (unsigned long)dev->sg->virtual - + dev_priv->gart_vm_start); + ring_start = (dev_priv->cp_ring->offset - dev->sg->vaddr + + dev_priv->gart_vm_start); RADEON_WRITE(RADEON_CP_RB_BASE, ring_start); @@ -744,9 +743,8 @@ static void radeon_cp_init_ring_buffer(struct drm_device * dev, #endif { RADEON_WRITE(RADEON_CP_RB_RPTR_ADDR, - dev_priv->ring_rptr->offset - - ((unsigned long) dev->sg->virtual) - + dev_priv->gart_vm_start); + dev_priv->ring_rptr->offset - dev->sg->vaddr + + dev_priv->gart_vm_start); } /* Set ring buffer size */ @@ -1278,7 +1276,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } dev_priv->sarea_priv = - (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle + + (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); #if __OS_HAS_AGP @@ -1286,9 +1284,9 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, drm_core_ioremap_wc(dev_priv->cp_ring, dev); drm_core_ioremap_wc(dev_priv->ring_rptr, dev); drm_core_ioremap_wc(dev->agp_buffer_map, dev); - if (!dev_priv->cp_ring->handle || - !dev_priv->ring_rptr->handle || - !dev->agp_buffer_map->handle) { + if (!dev_priv->cp_ring->virtual || + !dev_priv->ring_rptr->virtual || + !dev->agp_buffer_map->virtual) { DRM_ERROR("could not find ioremap agp regions!\n"); radeon_do_cleanup_cp(dev); return -EINVAL; @@ -1296,19 +1294,19 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, } else #endif { - dev_priv->cp_ring->handle = + dev_priv->cp_ring->virtual = (void *)(unsigned long)dev_priv->cp_ring->offset; - dev_priv->ring_rptr->handle = + dev_priv->ring_rptr->virtual = (void *)(unsigned long)dev_priv->ring_rptr->offset; - dev->agp_buffer_map->handle = + dev->agp_buffer_map->virtual = (void *)(unsigned long)dev->agp_buffer_map->offset; - DRM_DEBUG("dev_priv->cp_ring->handle %p\n", - dev_priv->cp_ring->handle); - DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", - dev_priv->ring_rptr->handle); - DRM_DEBUG("dev->agp_buffer_map->handle %p\n", - dev->agp_buffer_map->handle); + DRM_DEBUG("dev_priv->cp_ring->virtual %p\n", + dev_priv->cp_ring->virtual); + DRM_DEBUG("dev_priv->ring_rptr->virtual %p\n", + dev_priv->ring_rptr->virtual); + DRM_DEBUG("dev->agp_buffer_map->virtual %p\n", + dev->agp_buffer_map->virtual); } dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 16; @@ -1377,17 +1375,16 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, + dev_priv->gart_vm_start); else #endif - dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset - - (unsigned long)dev->sg->virtual - + dev_priv->gart_vm_start); + dev_priv->gart_buffers_offset = dev->agp_buffer_map->offset - + dev->sg->vaddr + dev_priv->gart_vm_start; DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size); DRM_DEBUG("dev_priv->gart_vm_start 0x%x\n", dev_priv->gart_vm_start); DRM_DEBUG("dev_priv->gart_buffers_offset 0x%lx\n", dev_priv->gart_buffers_offset); - dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; - dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle + dev_priv->ring.start = (u32 *) dev_priv->cp_ring->virtual; + dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->virtual + init->ring_size / sizeof(u32)); dev_priv->ring.size = init->ring_size; dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); @@ -1423,7 +1420,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); dev_priv->gart_info.addr = - dev_priv->gart_info.mapping.handle; + dev_priv->gart_info.mapping.virtual; if (dev_priv->flags & RADEON_IS_PCIE) dev_priv->gart_info.gart_reg_if = DRM_ATI_GART_PCIE; diff --git a/sys/dev/drm/radeon_cs.c b/sys/dev/drm/radeon_cs.c index b523126b947..14fe2fc2afb 100644 --- a/sys/dev/drm/radeon_cs.c +++ b/sys/dev/drm/radeon_cs.c @@ -821,7 +821,7 @@ static int r600_ib_get(struct drm_radeon_cs_parser *parser) } buf->file_priv = parser->file_priv; dev_priv->cs_buf = buf; - parser->ib = (void *)((vm_offset_t)dev->agp_buffer_map->handle + + parser->ib = (void *)((vm_offset_t)dev->agp_buffer_map->virtual + buf->offset); return 0; diff --git a/sys/dev/drm/radeon_state.c b/sys/dev/drm/radeon_state.c index fd8388ffa59..806150128dd 100644 --- a/sys/dev/drm/radeon_state.c +++ b/sys/dev/drm/radeon_state.c @@ -1420,7 +1420,7 @@ static void radeon_cp_dispatch_swap(struct drm_device *dev) static void radeon_cp_dispatch_flip(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; - struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->handle; + struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->virtual; int offset = (dev_priv->sarea_priv->pfCurrentPage == 1) ? dev_priv->front_offset : dev_priv->back_offset; RING_LOCALS; @@ -1582,7 +1582,7 @@ static void radeon_cp_dispatch_indirect(struct drm_device * dev, */ if (dwords & 1) { u32 *data = (u32 *) - ((char *)dev->agp_buffer_map->handle + ((char *)dev->agp_buffer_map->virtual + buf->offset + start); data[dwords++] = RADEON_CP_PACKET2; } @@ -1629,7 +1629,7 @@ static void radeon_cp_dispatch_indices(struct drm_device *dev, dwords = (prim->finish - prim->start + 3) / sizeof(u32); - data = (u32 *) ((char *)dev->agp_buffer_map->handle + + data = (u32 *) ((char *)dev->agp_buffer_map->virtual + elt_buf->offset + prim->start); data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2); @@ -1781,7 +1781,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev, /* Dispatch the indirect buffer. */ buffer = - (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); + (u32 *) ((char *)dev->agp_buffer_map->virtual + buf->offset); dwords = size / 4; #define RADEON_COPY_MT(_buf, _data, _width) \ diff --git a/sys/dev/drm/savage_bci.c b/sys/dev/drm/savage_bci.c index 4168ddf819a..0f8d66e1289 100644 --- a/sys/dev/drm/savage_bci.c +++ b/sys/dev/drm/savage_bci.c @@ -376,7 +376,7 @@ uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages); if (cur + nr_pages < dev_priv->nr_dma_pages) { - dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + dma_ptr = (uint32_t *)dev_priv->cmd_dma->virtual + cur * SAVAGE_DMA_PAGE_SIZE + dev_priv->dma_pages[cur].used; if (n < rest) rest = n; @@ -392,7 +392,7 @@ uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) dev_priv->dma_pages[i].used = 0; dev_priv->dma_pages[i].flushed = 0; } - dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle; + dma_ptr = (uint32_t *)dev_priv->cmd_dma->virtual; dev_priv->first_dma_page = cur = 0; } for (i = cur; nr_pages > 0; ++i, --nr_pages) { @@ -443,7 +443,7 @@ static void savage_dma_flush(drm_savage_private_t *dev_priv) /* pad with noops */ if (pad) { - uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->virtual + cur * SAVAGE_DMA_PAGE_SIZE + dev_priv->dma_pages[cur].used; dev_priv->dma_pages[cur].used += pad; while (pad != 0) { @@ -517,7 +517,7 @@ static void savage_fake_dma_flush(drm_savage_private_t *dev_priv) for (i = dev_priv->first_dma_page; i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used; ++i) { - uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->virtual + i * SAVAGE_DMA_PAGE_SIZE; #if SAVAGE_DMA_DEBUG /* Sanity check: all pages except the last one must be full. */ @@ -784,7 +784,7 @@ static int savage_do_init_bci(struct drm_device *dev, drm_savage_init_t *init) return -EINVAL; } drm_core_ioremap(dev_priv->cmd_dma, dev); - if (!dev_priv->cmd_dma->handle) { + if (!dev_priv->cmd_dma->virtual) { DRM_ERROR("failed to ioremap command " "DMA region!\n"); savage_do_cleanup_bci(dev); @@ -806,9 +806,9 @@ static int savage_do_init_bci(struct drm_device *dev, drm_savage_init_t *init) dev_priv->fake_dma.offset = 0; dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE; dev_priv->fake_dma.type = _DRM_SHM; - dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE, + dev_priv->fake_dma.virtual = drm_alloc(SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); - if (!dev_priv->fake_dma.handle) { + if (!dev_priv->fake_dma.virtual) { DRM_ERROR("could not allocate faked DMA buffer!\n"); savage_do_cleanup_bci(dev); return -ENOMEM; @@ -818,7 +818,7 @@ static int savage_do_init_bci(struct drm_device *dev, drm_savage_init_t *init) } dev_priv->sarea_priv = - (drm_savage_sarea_t *)((uint8_t *)dev_priv->sarea->handle + + (drm_savage_sarea_t *)((uint8_t *)dev_priv->sarea->virtual + init->sarea_priv_offset); /* setup bitmap descriptors */ @@ -857,7 +857,7 @@ static int savage_do_init_bci(struct drm_device *dev, drm_savage_init_t *init) dev_priv->event_counter = 0; dev_priv->event_wrap = 0; dev_priv->bci_ptr = (volatile uint32_t *) - ((uint8_t *)dev_priv->mmio->handle + SAVAGE_BCI_OFFSET); + ((uint8_t *)dev_priv->mmio->virtual + SAVAGE_BCI_OFFSET); if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S3D; } else { @@ -865,7 +865,7 @@ static int savage_do_init_bci(struct drm_device *dev, drm_savage_init_t *init) } if (dev_priv->status != NULL) { dev_priv->status_ptr = - (volatile uint32_t *)dev_priv->status->handle; + (volatile uint32_t *)dev_priv->status->virtual; dev_priv->wait_fifo = savage_bci_wait_fifo_shadow; dev_priv->wait_evnt = savage_bci_wait_event_shadow; dev_priv->status_ptr[1023] = dev_priv->event_counter; @@ -905,16 +905,16 @@ static int savage_do_cleanup_bci(struct drm_device *dev) drm_savage_private_t *dev_priv = dev->dev_private; if (dev_priv->cmd_dma == &dev_priv->fake_dma) { - if (dev_priv->fake_dma.handle) - drm_free(dev_priv->fake_dma.handle, + if (dev_priv->fake_dma.virtual) + drm_free(dev_priv->fake_dma.virtual, SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); - } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle && + } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->virtual && dev_priv->cmd_dma->type == _DRM_AGP && dev_priv->dma_type == SAVAGE_DMA_AGP) drm_core_ioremapfree(dev_priv->cmd_dma, dev); if (dev_priv->dma_type == SAVAGE_DMA_AGP && - dev->agp_buffer_map && dev->agp_buffer_map->handle) { + dev->agp_buffer_map && dev->agp_buffer_map->virtual) { drm_core_ioremapfree(dev->agp_buffer_map, dev); /* make sure the next instance (which may be running * in PCI mode) doesn't try to use an old diff --git a/sys/dev/drm/via_dma.c b/sys/dev/drm/via_dma.c index e1af2938072..6f435fe5986 100644 --- a/sys/dev/drm/via_dma.c +++ b/sys/dev/drm/via_dma.c @@ -158,6 +158,9 @@ static inline uint32_t *via_check_dma(drm_via_private_t * dev_priv, int via_dma_cleanup(struct drm_device * dev) { + drm_via_blitq_t *blitq; + int i; + if (dev->dev_private) { drm_via_private_t *dev_priv = (drm_via_private_t *) dev->dev_private; @@ -169,6 +172,10 @@ int via_dma_cleanup(struct drm_device * dev) dev_priv->ring.virtual_start = NULL; } + for (i=0; i< VIA_NUM_BLIT_ENGINES; ++i) { + blitq = dev_priv->blit_queues + i; + mtx_destroy(&blitq->blit_lock); + } } return 0; @@ -206,14 +213,14 @@ static int via_initialize(struct drm_device * dev, drm_core_ioremap_wc(&dev_priv->ring.map, dev); - if (dev_priv->ring.map.handle == NULL) { + if (dev_priv->ring.map.virtual == NULL) { via_dma_cleanup(dev); DRM_ERROR("can not ioremap virtual address for" " ring buffer\n"); return -ENOMEM; } - dev_priv->ring.virtual_start = dev_priv->ring.map.handle; + dev_priv->ring.virtual_start = dev_priv->ring.map.virtual; dev_priv->dma_ptr = dev_priv->ring.virtual_start; dev_priv->dma_low = 0; @@ -222,7 +229,7 @@ static int via_initialize(struct drm_device * dev, dev_priv->dma_offset = init->offset; dev_priv->last_pause_ptr = NULL; dev_priv->hw_addr_ptr = - (volatile uint32_t *)((char *)dev_priv->mmio->handle + + (volatile uint32_t *)((char *)dev_priv->mmio->virtual + init->reg_pause_addr); via_cmdbuf_start(dev_priv); diff --git a/sys/dev/drm/via_dmablit.c b/sys/dev/drm/via_dmablit.c index ea449f1f992..9be42ffa7f1 100644 --- a/sys/dev/drm/via_dmablit.c +++ b/sys/dev/drm/via_dmablit.c @@ -178,9 +178,9 @@ via_free_sg_info(drm_via_sg_info_t *vsg) case dr_via_pages_locked: for (i=0; i < vsg->num_pages; ++i) { if ( NULL != (page = vsg->pages[i])) { - vm_page_lock_queues(); + vm_page_lock(page); vm_page_unwire(page, 0); - vm_page_unlock_queues(); + vm_page_unlock(page); } } case dr_via_pages_alloc: @@ -248,10 +248,10 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) (vm_offset_t)xfer->mem_addr + IDX_TO_OFF(i), VM_PROT_RW); if (m == NULL) break; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vsg->pages[i] = m; } vsg->state = dr_via_pages_locked; diff --git a/sys/dev/drm/via_map.c b/sys/dev/drm/via_map.c index 5504a6b198d..687b8be8013 100644 --- a/sys/dev/drm/via_map.c +++ b/sys/dev/drm/via_map.c @@ -59,7 +59,7 @@ static int via_do_init_map(struct drm_device * dev, drm_via_init_t * init) } dev_priv->sarea_priv = - (drm_via_sarea_t *) ((u8 *) dev_priv->sarea->handle + + (drm_via_sarea_t *) ((u8 *) dev_priv->sarea->virtual + init->sarea_priv_offset); dev_priv->agpAddr = init->agpAddr; diff --git a/sys/dev/drm/via_mm.c b/sys/dev/drm/via_mm.c index 9aaee9f4445..6dc185ba125 100644 --- a/sys/dev/drm/via_mm.c +++ b/sys/dev/drm/via_mm.c @@ -45,7 +45,6 @@ int via_agp_init(struct drm_device *dev, void *data, struct drm_file *file_priv) ret = drm_sman_set_range(&dev_priv->sman, VIA_MEM_AGP, 0, agp->size >> VIA_MM_ALIGN_SHIFT); - if (ret) { DRM_ERROR("AGP memory manager initialisation error\n"); return ret; @@ -66,7 +65,6 @@ int via_fb_init(struct drm_device *dev, void *data, struct drm_file *file_priv) ret = drm_sman_set_range(&dev_priv->sman, VIA_MEM_VIDEO, 0, fb->size >> VIA_MM_ALIGN_SHIFT); - if (ret) { DRM_ERROR("VRAM memory manager initialisation error\n"); return ret; diff --git a/sys/dev/drm/via_video.c b/sys/dev/drm/via_video.c index f903cf55f10..3bd96051057 100644 --- a/sys/dev/drm/via_video.c +++ b/sys/dev/drm/via_video.c @@ -78,7 +78,7 @@ int via_decoder_futex(struct drm_device *dev, void *data, struct drm_file *file_ DRM_DEBUG("\n"); - if (fx->lock > VIA_NR_XVMC_LOCKS) + if (fx->lock >= VIA_NR_XVMC_LOCKS) return -EFAULT; lock = (volatile int *)XVMCLOCKPTR(sAPriv, fx->lock); diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 834e676a6af..2fb4f174464 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -93,7 +93,7 @@ int em_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char em_driver_version[] = "7.0.4"; +char em_driver_version[] = "7.0.5"; /********************************************************************* @@ -347,8 +347,13 @@ static int em_debug_sbp = FALSE; TUNABLE_INT("hw.em.sbp", &em_debug_sbp); /* Local controls for MSI/MSIX */ +#ifdef EM_MULTIQUEUE static int em_enable_msix = TRUE; static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */ +#else +static int em_enable_msix = FALSE; +static int em_msix_queues = 0; /* disable */ +#endif TUNABLE_INT("hw.em.enable_msix", &em_enable_msix); TUNABLE_INT("hw.em.msix_queues", &em_msix_queues); @@ -1371,9 +1376,7 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) } EM_CORE_UNLOCK(adapter); - EM_RX_LOCK(rxr); rx_done = em_rxeof(rxr, count); - EM_RX_UNLOCK(rxr); EM_TX_LOCK(txr); em_txeof(txr); @@ -1449,9 +1452,7 @@ em_handle_que(void *context, int pending) if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - EM_RX_LOCK(rxr); more_rx = em_rxeof(rxr, adapter->rx_process_limit); - EM_RX_UNLOCK(rxr); EM_TX_LOCK(txr); em_txeof(txr); @@ -1484,12 +1485,17 @@ em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; + bool more; ++txr->tx_irq; EM_TX_LOCK(txr); - em_txeof(txr); + more = em_txeof(txr); EM_TX_UNLOCK(txr); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); + if (more) + taskqueue_enqueue(txr->tq, &txr->tx_task); + else + /* Reenable this interrupt */ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); return; } @@ -1506,10 +1512,8 @@ em_msix_rx(void *arg) struct adapter *adapter = rxr->adapter; bool more; - EM_RX_LOCK(rxr); ++rxr->rx_irq; more = em_rxeof(rxr, adapter->rx_process_limit); - EM_RX_UNLOCK(rxr); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else @@ -1548,9 +1552,7 @@ em_handle_rx(void *context, int pending) struct adapter *adapter = rxr->adapter; bool more; - EM_RX_LOCK(rxr); more = em_rxeof(rxr, adapter->rx_process_limit); - EM_RX_UNLOCK(rxr); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else @@ -2702,10 +2704,10 @@ em_setup_interface(device_t dev, struct adapter *adapter) ifp->if_capabilities |= IFCAP_POLLING; #endif - /* Enable All WOL methods by default */ + /* Enable only WOL MAGIC by default */ if (adapter->wol) { ifp->if_capabilities |= IFCAP_WOL; - ifp->if_capenable |= IFCAP_WOL; + ifp->if_capenable |= IFCAP_WOL_MAGIC; } /* @@ -4095,7 +4097,7 @@ em_rxeof(struct rx_ring *rxr, int count) bool eop; struct e1000_rx_desc *cur; - EM_RX_LOCK_ASSERT(rxr); + EM_RX_LOCK(rxr); for (i = rxr->next_to_check, processed = 0; count != 0;) { @@ -4189,8 +4191,13 @@ skip: i = 0; /* Send to the stack */ - if (sendmp != NULL) + if (sendmp != NULL) { + rxr->next_to_check = i; + EM_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, sendmp); + EM_RX_LOCK(rxr); + i = rxr->next_to_check; + } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { @@ -4206,6 +4213,7 @@ skip: } rxr->next_to_check = i; + EM_RX_UNLOCK(rxr); #ifdef DEVICE_POLLING return (rxdone); diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 267590faf0e..e901bc24e2f 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -99,7 +99,7 @@ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char igb_driver_version[] = "version - 1.9.4"; +char igb_driver_version[] = "version - 1.9.5"; /********************************************************************* @@ -758,8 +758,15 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) if (!adapter->link_active) return; - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + /* Call cleanup if number of TX descriptors low */ + if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) + igb_txeof(txr); + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; @@ -779,6 +786,7 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ + txr->watchdog_time = ticks; txr->watchdog_check = TRUE; } } @@ -817,8 +825,6 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m) /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; - else - i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; @@ -847,6 +853,10 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) return (err); } + /* Call cleanup if number of TX descriptors low */ + if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) + igb_txeof(txr); + enq = 0; if (m == NULL) { next = drbr_dequeue(ifp, txr->br); @@ -856,6 +866,7 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) next = drbr_dequeue(ifp, txr->br); } else next = m; + /* Process the queue */ while (next != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { @@ -877,6 +888,7 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) if (enq > 0) { /* Set the watchdog */ txr->watchdog_check = TRUE; + txr->watchdog_time = ticks; } return (err); } @@ -1248,19 +1260,13 @@ igb_handle_que(void *context, int pending) struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; - u32 loop = IGB_MAX_LOOP; bool more; - /* RX first */ - do { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = igb_rxeof(que, -1); - } while (loop-- && more); - if (IGB_TX_TRYLOCK(txr)) { - loop = IGB_MAX_LOOP; - do { - more = igb_txeof(txr); - } while (loop-- && more); + IGB_TX_LOCK(txr); + igb_txeof(txr); #if __FreeBSD_version >= 800000 igb_mq_start_locked(ifp, txr, NULL); #else @@ -1268,6 +1274,10 @@ igb_handle_que(void *context, int pending) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); + if (more) { + taskqueue_enqueue(que->tq, &que->que_task); + return; + } } /* Reenable this interrupt */ diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index cf71f2bb74b..825fb4ee6f0 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -39,9 +39,6 @@ #include #include -#if __FreeBSD_version >= 800000 -#include -#endif #include #include #include @@ -94,7 +91,7 @@ int lem_display_debug_stats = 0; /********************************************************************* * Legacy Em Driver version: *********************************************************************/ -char lem_driver_version[] = "1.0.0"; +char lem_driver_version[] = "1.0.1"; /********************************************************************* @@ -177,11 +174,6 @@ static int lem_suspend(device_t); static int lem_resume(device_t); static void lem_start(struct ifnet *); static void lem_start_locked(struct ifnet *ifp); -#if __FreeBSD_version >= 800000 -static int lem_mq_start(struct ifnet *, struct mbuf *); -static int lem_mq_start_locked(struct ifnet *, struct mbuf *); -static void lem_qflush(struct ifnet *); -#endif static int lem_ioctl(struct ifnet *, u_long, caddr_t); static void lem_init(void *); static void lem_init_locked(struct adapter *); @@ -304,12 +296,6 @@ MODULE_DEPEND(lem, ether, 1, 1, 1); #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) -#define M_TSO_LEN 66 - -/* Allow common code without TSO */ -#ifndef CSUM_TSO -#define CSUM_TSO 0 -#endif static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); @@ -827,118 +813,6 @@ lem_resume(device_t dev) } -/********************************************************************* - * Transmit entry point - * - * em_start is called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available stack is notified and - * the packet is requeued. - **********************************************************************/ - -#if __FreeBSD_version >= 800000 -static int -lem_mq_start_locked(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct mbuf *next; - int error = E1000_SUCCESS; - - EM_TX_LOCK_ASSERT(adapter); - /* To allow being called from a tasklet */ - if (m == NULL) - goto process; - - if (((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - || (!adapter->link_active)) { - error = drbr_enqueue(ifp, adapter->br, m); - return (error); - } else if (drbr_empty(ifp, adapter->br) && - (adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD)) { - if ((error = lem_xmit(adapter, &m)) != 0) { - if (m) - error = drbr_enqueue(ifp, adapter->br, m); - return (error); - } else { - /* - * We've bypassed the buf ring so we need to update - * ifp directly - */ - drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags); - /* - ** Send a copy of the frame to the BPF - ** listener and set the watchdog on. - */ - ETHER_BPF_MTAP(ifp, m); - adapter->watchdog_check = TRUE; - } - } else if ((error = drbr_enqueue(ifp, adapter->br, m)) != 0) - return (error); - -process: - if (drbr_empty(ifp, adapter->br)) - return(error); - /* Process the queue */ - while (TRUE) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - next = drbr_dequeue(ifp, adapter->br); - if (next == NULL) - break; - if ((error = lem_xmit(adapter, &next)) != 0) { - if (next != NULL) - error = drbr_enqueue(ifp, adapter->br, next); - break; - } - drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); - ETHER_BPF_MTAP(ifp, next); - /* Set the watchdog */ - adapter->watchdog_check = TRUE; - } - - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - return (error); -} - -/* -** Multiqueue capable stack interface, this is not -** yet truely multiqueue, but that is coming... -*/ -static int -lem_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - - struct adapter *adapter = ifp->if_softc; - int error = 0; - - if (EM_TX_TRYLOCK(adapter)) { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - error = lem_mq_start_locked(ifp, m); - EM_TX_UNLOCK(adapter); - } else - error = drbr_enqueue(ifp, adapter->br, m); - - return (error); -} - -static void -lem_qflush(struct ifnet *ifp) -{ - struct mbuf *m; - struct adapter *adapter = (struct adapter *)ifp->if_softc; - - EM_TX_LOCK(adapter); - while ((m = buf_ring_dequeue_sc(adapter->br)) != NULL) - m_freem(m); - if_qflush(ifp); - EM_TX_UNLOCK(adapter); -} -#endif /* FreeBSD_version */ - static void lem_start_locked(struct ifnet *ifp) { @@ -975,6 +849,7 @@ lem_start_locked(struct ifnet *ifp) /* Set timeout in case hardware has problems transmitting. */ adapter->watchdog_check = TRUE; + adapter->watchdog_time = ticks; } if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) ifp->if_drv_flags |= IFF_DRV_OACTIVE; @@ -1151,12 +1026,6 @@ lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } -#if __FreeBSD_version >= 700000 - if (mask & IFCAP_TSO4) { - ifp->if_capenable ^= IFCAP_TSO4; - reinit = 1; - } -#endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; @@ -1279,10 +1148,6 @@ lem_init_locked(struct adapter *adapter) if (adapter->hw.mac.type >= e1000_82543) { if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 700000 - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; -#endif } /* Configure for OS presence */ @@ -1394,13 +1259,8 @@ lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) EM_TX_LOCK(adapter); lem_txeof(adapter); -#if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, adapter->br)) - lem_mq_start_locked(ifp, NULL); -#else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); -#endif EM_TX_UNLOCK(adapter); return (rx_done); } @@ -1494,14 +1354,8 @@ lem_handle_rxtx(void *context, int pending) taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); EM_TX_LOCK(adapter); lem_txeof(adapter); - -#if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, adapter->br)) - lem_mq_start_locked(ifp, NULL); -#else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); -#endif EM_TX_UNLOCK(adapter); } @@ -1852,15 +1706,17 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp) if (mtag != NULL) { ctxd->upper.fields.special = htole16(VLAN_TAG_VALUE(mtag)); + ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); + } #else /* FreeBSD 7 */ if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ ctxd->upper.fields.special = htole16(m_head->m_pkthdr.ether_vtag); -#endif /* Tell hardware to add tag */ ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); } +#endif tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; @@ -2544,12 +2400,6 @@ lem_setup_interface(device_t dev, struct adapter *adapter) ifp->if_capabilities = ifp->if_capenable = 0; -#if __FreeBSD_version >= 800000 - /* Multiqueue tx functions */ - ifp->if_transmit = lem_mq_start; - ifp->if_qflush = lem_qflush; - adapter->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &adapter->tx_mtx); -#endif if (adapter->hw.mac.type >= e1000_82543) { int version_cap; #if __FreeBSD_version < 700000 @@ -4549,10 +4399,6 @@ lem_print_hw_stats(struct adapter *adapter) (long long)adapter->stats.gprc); device_printf(dev, "Good Packets Xmtd = %lld\n", (long long)adapter->stats.gptc); - device_printf(dev, "TSO Contexts Xmtd = %lld\n", - (long long)adapter->stats.tsctc); - device_printf(dev, "TSO Contexts Failed = %lld\n", - (long long)adapter->stats.tsctfc); } /********************************************************************** diff --git a/sys/dev/ed/if_ed.c b/sys/dev/ed/if_ed.c index e065a670d5c..839e2a52cc9 100644 --- a/sys/dev/ed/if_ed.c +++ b/sys/dev/ed/if_ed.c @@ -283,8 +283,8 @@ ed_attach(device_t dev) ifp->if_start = ed_start; ifp->if_ioctl = ed_ioctl; ifp->if_init = ed_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ifp->if_linkmib = &sc->mibdata; ifp->if_linkmiblen = sizeof sc->mibdata; diff --git a/sys/dev/ep/if_ep.c b/sys/dev/ep/if_ep.c index 59dd1c4cc99..24362558ff3 100644 --- a/sys/dev/ep/if_ep.c +++ b/sys/dev/ep/if_ep.c @@ -306,8 +306,8 @@ ep_attach(struct ep_softc *sc) ifp->if_start = epstart; ifp->if_ioctl = epioctl; ifp->if_init = epinit; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); callout_init_mtx(&sc->watchdog_timer, &sc->sc_mtx, 0); diff --git a/sys/dev/esp/esp_sbus.c b/sys/dev/esp/esp_sbus.c index 2033cc65531..3ba0d46ae71 100644 --- a/sys/dev/esp/esp_sbus.c +++ b/sys/dev/esp/esp_sbus.c @@ -466,9 +466,7 @@ espattach(struct esp_softc *esc, const struct ncr53c9x_glue *gluep) goto fail_lock; } - if (OF_getprop(ofw_bus_get_node(esc->sc_dev), "scsi-initiator-id", - &sc->sc_id, sizeof(sc->sc_id)) == -1) - sc->sc_id = 7; + sc->sc_id = OF_getscsinitid(esc->sc_dev); #ifdef ESP_SBUS_DEBUG device_printf(esc->sc_dev, "%s: sc_id %d, freq %d\n", diff --git a/sys/dev/ex/if_ex.c b/sys/dev/ex/if_ex.c index d9e0c98f9fa..efd54bca0e5 100644 --- a/sys/dev/ex/if_ex.c +++ b/sys/dev/ex/if_ex.c @@ -237,7 +237,7 @@ ex_attach(device_t dev) ifp->if_start = ex_start; ifp->if_ioctl = ex_ioctl; ifp->if_init = ex_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifmedia_init(&sc->ifmedia, 0, ex_ifmedia_upd, ex_ifmedia_sts); mtx_init(&sc->lock, device_get_nameunit(dev), MTX_NETWORK_LOCK, diff --git a/sys/dev/fe/if_fe.c b/sys/dev/fe/if_fe.c index eff92b77366..9fe1f2c2a09 100644 --- a/sys/dev/fe/if_fe.c +++ b/sys/dev/fe/if_fe.c @@ -766,7 +766,7 @@ fe_attach (device_t dev) * Set fixed interface flags. */ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); #if FE_SINGLE_TRANSMISSION /* Override txb config to allocate minimum. */ diff --git a/sys/dev/fxp/if_fxp.c b/sys/dev/fxp/if_fxp.c index a8d961eb0fb..9c0b44aef75 100644 --- a/sys/dev/fxp/if_fxp.c +++ b/sys/dev/fxp/if_fxp.c @@ -106,9 +106,8 @@ static int tx_threshold = 64; /* * The configuration byte map has several undefined fields which - * must be one or must be zero. Set up a template for these bits - * only, (assuming a 82557 chip) leaving the actual configuration - * to fxp_init. + * must be one or must be zero. Set up a template for these bits. + * The actual configuration is performed in fxp_init. * * See struct fxp_cb_config for the bit definitions. */ @@ -137,7 +136,17 @@ static u_char fxp_cb_config_template[] = { 0xf0, /* 18 */ 0x0, /* 19 */ 0x3f, /* 20 */ - 0x5 /* 21 */ + 0x5, /* 21 */ + 0x0, /* 22 */ + 0x0, /* 23 */ + 0x0, /* 24 */ + 0x0, /* 25 */ + 0x0, /* 26 */ + 0x0, /* 27 */ + 0x0, /* 28 */ + 0x0, /* 29 */ + 0x0, /* 30 */ + 0x0 /* 31 */ }; /* @@ -1417,60 +1426,6 @@ fxp_encap(struct fxp_softc *sc, struct mbuf **m_head) FXP_IPCB_HARDWAREPARSING_ENABLE; m = *m_head; - /* - * Deal with TCP/IP checksum offload. Note that - * in order for TCP checksum offload to work, - * the pseudo header checksum must have already - * been computed and stored in the checksum field - * in the TCP header. The stack should have - * already done this for us. - */ - if (m->m_pkthdr.csum_flags & FXP_CSUM_FEATURES) { - txp->tx_cb->ipcb_ip_schedule = FXP_IPCB_TCPUDP_CHECKSUM_ENABLE; - if (m->m_pkthdr.csum_flags & CSUM_TCP) - txp->tx_cb->ipcb_ip_schedule |= FXP_IPCB_TCP_PACKET; - -#ifdef FXP_IP_CSUM_WAR - /* - * XXX The 82550 chip appears to have trouble - * dealing with IP header checksums in very small - * datagrams, namely fragments from 1 to 3 bytes - * in size. For example, say you want to transmit - * a UDP packet of 1473 bytes. The packet will be - * fragmented over two IP datagrams, the latter - * containing only one byte of data. The 82550 will - * botch the header checksum on the 1-byte fragment. - * As long as the datagram contains 4 or more bytes - * of data, you're ok. - * - * The following code attempts to work around this - * problem: if the datagram is less than 38 bytes - * in size (14 bytes ether header, 20 bytes IP header, - * plus 4 bytes of data), we punt and compute the IP - * header checksum by hand. This workaround doesn't - * work very well, however, since it can be fooled - * by things like VLAN tags and IP options that make - * the header sizes/offsets vary. - */ - - if (m->m_pkthdr.csum_flags & CSUM_IP) { - if (m->m_pkthdr.len < 38) { - struct ip *ip; - m->m_data += ETHER_HDR_LEN; - ip = mtod(m, struct ip *); - ip->ip_sum = in_cksum(m, ip->ip_hl << 2); - m->m_data -= ETHER_HDR_LEN; - m->m_pkthdr.csum_flags &= ~CSUM_IP; - } else { - txp->tx_cb->ipcb_ip_activation_high = - FXP_IPCB_HARDWAREPARSING_ENABLE; - txp->tx_cb->ipcb_ip_schedule |= - FXP_IPCB_IP_CHECKSUM_ENABLE; - } - } -#endif - } - if (m->m_pkthdr.csum_flags & CSUM_TSO) { /* * 82550/82551 requires ethernet/IP/TCP headers must be @@ -1539,6 +1494,58 @@ fxp_encap(struct fxp_softc *sc, struct mbuf **m_head) tcp_payload = m->m_pkthdr.len - ip_off - (ip->ip_hl << 2); tcp_payload -= tcp->th_off << 2; *m_head = m; + } else if (m->m_pkthdr.csum_flags & FXP_CSUM_FEATURES) { + /* + * Deal with TCP/IP checksum offload. Note that + * in order for TCP checksum offload to work, + * the pseudo header checksum must have already + * been computed and stored in the checksum field + * in the TCP header. The stack should have + * already done this for us. + */ + txp->tx_cb->ipcb_ip_schedule = FXP_IPCB_TCPUDP_CHECKSUM_ENABLE; + if (m->m_pkthdr.csum_flags & CSUM_TCP) + txp->tx_cb->ipcb_ip_schedule |= FXP_IPCB_TCP_PACKET; + +#ifdef FXP_IP_CSUM_WAR + /* + * XXX The 82550 chip appears to have trouble + * dealing with IP header checksums in very small + * datagrams, namely fragments from 1 to 3 bytes + * in size. For example, say you want to transmit + * a UDP packet of 1473 bytes. The packet will be + * fragmented over two IP datagrams, the latter + * containing only one byte of data. The 82550 will + * botch the header checksum on the 1-byte fragment. + * As long as the datagram contains 4 or more bytes + * of data, you're ok. + * + * The following code attempts to work around this + * problem: if the datagram is less than 38 bytes + * in size (14 bytes ether header, 20 bytes IP header, + * plus 4 bytes of data), we punt and compute the IP + * header checksum by hand. This workaround doesn't + * work very well, however, since it can be fooled + * by things like VLAN tags and IP options that make + * the header sizes/offsets vary. + */ + + if (m->m_pkthdr.csum_flags & CSUM_IP) { + if (m->m_pkthdr.len < 38) { + struct ip *ip; + m->m_data += ETHER_HDR_LEN; + ip = mtod(m, struct ip *); + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + m->m_data -= ETHER_HDR_LEN; + m->m_pkthdr.csum_flags &= ~CSUM_IP; + } else { + txp->tx_cb->ipcb_ip_activation_high = + FXP_IPCB_HARDWAREPARSING_ENABLE; + txp->tx_cb->ipcb_ip_schedule |= + FXP_IPCB_IP_CHECKSUM_ENABLE; + } + } +#endif } error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map, *m_head, @@ -2347,7 +2354,7 @@ fxp_init_body(struct fxp_softc *sc) cbp->force_fdx = 0; /* (don't) force full duplex */ cbp->fdx_pin_en = 1; /* (enable) FDX# pin */ cbp->multi_ia = 0; /* (don't) accept multiple IAs */ - cbp->mc_all = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; + cbp->mc_all = ifp->if_flags & IFF_ALLMULTI ? 1 : prm; cbp->gamla_rx = sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0; cbp->vlan_strip_en = ((sc->flags & FXP_FLAG_EXT_RFA) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) ? 1 : 0; diff --git a/sys/dev/hwpmc/hwpmc_core.c b/sys/dev/hwpmc/hwpmc_core.c index 90d7c8bc23f..e7de09915b3 100644 --- a/sys/dev/hwpmc/hwpmc_core.c +++ b/sys/dev/hwpmc/hwpmc_core.c @@ -603,7 +603,7 @@ static struct iap_event_descr iap_events[] = { IAPDESCR(06H_0FH, 0x06, 0x0F, IAP_F_FM | IAP_F_I7O), IAPDESCR(07H_00H, 0x07, 0x00, IAP_F_FM | IAP_F_CC | IAP_F_CC2), - IAPDESCR(07H_01H, 0x07, 0x01, IAP_F_FM | IAP_F_ALLCPUSCORE2 | IAP_F_WM), + IAPDESCR(07H_01H, 0x07, 0x01, IAP_F_FM | IAP_F_ALLCPUSCORE2 | IAP_F_I7 | IAP_F_WM), IAPDESCR(07H_02H, 0x07, 0x02, IAP_F_FM | IAP_F_ALLCPUSCORE2), IAPDESCR(07H_03H, 0x07, 0x03, IAP_F_FM | IAP_F_ALLCPUSCORE2), IAPDESCR(07H_06H, 0x07, 0x06, IAP_F_FM | IAP_F_CA), @@ -1053,6 +1053,7 @@ static struct iap_event_descr iap_events[] = { IAPDESCR(B0H_02H, 0xB0, 0x02, IAP_F_FM | IAP_F_WM | IAP_F_I7O), IAPDESCR(B0H_04H, 0xB0, 0x04, IAP_F_FM | IAP_F_WM | IAP_F_I7O), IAPDESCR(B0H_08H, 0xB0, 0x08, IAP_F_FM | IAP_F_WM | IAP_F_I7O), + IAPDESCR(B0H_10H, 0xB0, 0x10, IAP_F_FM | IAP_F_WM | IAP_F_I7O), IAPDESCR(B0H_20H, 0xB0, 0x20, IAP_F_FM | IAP_F_I7O), IAPDESCR(B0H_40H, 0xB0, 0x40, IAP_F_FM | IAP_F_I7 | IAP_F_WM), IAPDESCR(B0H_80H, 0xB0, 0x80, IAP_F_FM | IAP_F_CA | IAP_F_WM | IAP_F_I7O), diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index e33b431e7e3..a19c85ec379 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -2675,16 +2675,16 @@ pmc_start(struct pmc *pm) PMCDBG(PMC,OPS,1, "po=%p in global list", po); } po->po_sscount++; - } - /* - * Log mapping information for all existing processes in the - * system. Subsequent mappings are logged as they happen; - * see pmc_process_mmap(). - */ - if (po->po_logprocmaps == 0) { - pmc_log_all_process_mappings(po); - po->po_logprocmaps = 1; + /* + * Log mapping information for all existing processes in the + * system. Subsequent mappings are logged as they happen; + * see pmc_process_mmap(). + */ + if (po->po_logprocmaps == 0) { + pmc_log_all_process_mappings(po); + po->po_logprocmaps = 1; + } } /* diff --git a/sys/dev/hwpmc/pmc_events.h b/sys/dev/hwpmc/pmc_events.h index e38772cd95d..df2d3efd7bf 100644 --- a/sys/dev/hwpmc/pmc_events.h +++ b/sys/dev/hwpmc/pmc_events.h @@ -2207,7 +2207,7 @@ __PMC_EV_ALIAS("OFFCORE_REQUESTS.DEMAND.READ_DATA", IAP_EVENT_B0H_01H) \ __PMC_EV_ALIAS("OFFCORE_REQUESTS.DEMAND.READ_CODE", IAP_EVENT_B0H_02H) \ __PMC_EV_ALIAS("OFFCORE_REQUESTS.DEMAND.RFO", IAP_EVENT_B0H_04H) \ __PMC_EV_ALIAS("OFFCORE_REQUESTS.ANY.READ", IAP_EVENT_B0H_08H) \ -__PMC_EV_ALIAS("OFFCORE_REQUESTS.ANY.RFO", IAP_EVENT_80H_10H) \ +__PMC_EV_ALIAS("OFFCORE_REQUESTS.ANY.RFO", IAP_EVENT_B0H_10H) \ __PMC_EV_ALIAS("OFFCORE_REQUESTS.L1D_WRITEBACK", IAP_EVENT_B0H_40H) \ __PMC_EV_ALIAS("OFFCORE_REQUESTS.ANY", IAP_EVENT_B0H_80H) \ __PMC_EV_ALIAS("UOPS_EXECUTED.PORT0", IAP_EVENT_B1H_01H) \ diff --git a/sys/dev/ie/if_ie.c b/sys/dev/ie/if_ie.c index 34c658eef3d..172bf9e5bdf 100644 --- a/sys/dev/ie/if_ie.c +++ b/sys/dev/ie/if_ie.c @@ -318,7 +318,7 @@ ie_attach(device_t dev) ifp->if_start = iestart; ifp->if_ioctl = ieioctl; ifp->if_init = ieinit; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ether_ifattach(ifp, sc->enaddr); diff --git a/sys/dev/iicbus/if_ic.c b/sys/dev/iicbus/if_ic.c index bb5186cc401..4a05b163f6b 100644 --- a/sys/dev/iicbus/if_ic.c +++ b/sys/dev/iicbus/if_ic.c @@ -181,7 +181,7 @@ icattach(device_t dev) ifp->if_output = icoutput; ifp->if_hdrlen = 0; ifp->if_addrlen = 0; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ic_alloc_buffers(sc, ICMTU); diff --git a/sys/dev/io/iodev.c b/sys/dev/io/iodev.c index b142a39c610..eae69f484a4 100644 --- a/sys/dev/io/iodev.c +++ b/sys/dev/io/iodev.c @@ -30,22 +30,27 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include -#include -#include +#include #include -#include +#include #include -#include #include -#include - -#include -#include #include +#include + +static int ioopen(struct cdev *dev, int flags, int fmt, + struct thread *td); +static int ioclose(struct cdev *dev, int flags, int fmt, + struct thread *td); +static int ioioctl(struct cdev *dev, u_long cmd, caddr_t data, + int fflag, struct thread *td); + +static int iopio_read(struct iodev_pio_req *req); +static int iopio_write(struct iodev_pio_req *req); + static struct cdev *iodev; static struct cdevsw io_cdevsw = { @@ -56,6 +61,129 @@ static struct cdevsw io_cdevsw = { .d_name = "io", }; +/* ARGSUSED */ +static int +ioopen(struct cdev *dev __unused, int flags __unused, int fmt __unused, + struct thread *td) +{ + int error; + + error = priv_check(td, PRIV_IO); + if (error != 0) + return (error); + error = securelevel_gt(td->td_ucred, 0); + if (error != 0) + return (error); + error = iodev_open(td); + + return (error); +} + +/* ARGSUSED */ +static int +ioclose(struct cdev *dev __unused, int flags __unused, int fmt __unused, + struct thread *td) +{ + + return (iodev_close(td)); +} + +/* ARGSUSED */ +static int +ioioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, + int fflag __unused, struct thread *td __unused) +{ + struct iodev_pio_req *pio_req; + int error; + + switch (cmd) { + case IODEV_PIO: + pio_req = (struct iodev_pio_req *)data; + switch (pio_req->access) { + case IODEV_PIO_READ: + error = iopio_read(pio_req); + break; + case IODEV_PIO_WRITE: + error = iopio_write(pio_req); + break; + default: + error = EINVAL; + break; + } + break; + default: + error = iodev_ioctl(cmd, data); + } + + return (error); +} + +static int +iopio_read(struct iodev_pio_req *req) +{ + + switch (req->width) { + case 1: + req->val = iodev_read_1(req->port); + break; + case 2: + if (req->port & 1) { + req->val = iodev_read_1(req->port); + req->val |= iodev_read_1(req->port + 1) << 8; + } else + req->val = iodev_read_2(req->port); + break; + case 4: + if (req->port & 1) { + req->val = iodev_read_1(req->port); + req->val |= iodev_read_2(req->port + 1) << 8; + req->val |= iodev_read_1(req->port + 3) << 24; + } else if (req->port & 2) { + req->val = iodev_read_2(req->port); + req->val |= iodev_read_2(req->port + 2) << 16; + } else + req->val = iodev_read_4(req->port); + break; + default: + return (EINVAL); + } + + return (0); +} + +static int +iopio_write(struct iodev_pio_req *req) +{ + + switch (req->width) { + case 1: + iodev_write_1(req->port, req->val); + break; + case 2: + if (req->port & 1) { + iodev_write_1(req->port, req->val); + iodev_write_1(req->port + 1, req->val >> 8); + } else + iodev_write_2(req->port, req->val); + break; + case 4: + if (req->port & 1) { + iodev_write_1(req->port, req->val); + iodev_write_2(req->port + 1, req->val >> 8); + iodev_write_1(req->port + 3, req->val >> 24); + } else if (req->port & 2) { + iodev_write_2(req->port, req->val); + iodev_write_2(req->port + 2, req->val >> 16); + } else + iodev_write_4(req->port, req->val); + break; + default: + return (EINVAL); + } + + return (0); +} + /* ARGSUSED */ static int io_modevent(module_t mod __unused, int type, void *data __unused) diff --git a/sys/mips/include/segments.h b/sys/dev/io/iodev.h similarity index 63% rename from sys/mips/include/segments.h rename to sys/dev/io/iodev.h index 406b965f2e0..d040fcccf48 100644 --- a/sys/mips/include/segments.h +++ b/sys/dev/io/iodev.h @@ -1,11 +1,7 @@ /*- - * Copyright (c) 1989, 1990 William F. Jolitz - * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2010 Marcel Moolenaar * All rights reserved. * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -14,14 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -30,11 +23,22 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)segments.h 7.1 (Berkeley) 5/9/91 * $FreeBSD$ */ -#ifndef _MACHINE_SEGMENTS_H_ -#define _MACHINE_SEGMENTS_H_ +#ifndef _DEV_IODEV_H_ +#define _DEV_IODEV_H_ -#endif /* !_MACHINE_SEGMENTS_H_ */ +#define IODEV_PIO_READ 0 +#define IODEV_PIO_WRITE 1 + +struct iodev_pio_req { + u_int access; + u_int port; + u_int width; + u_int val; +}; + +#define IODEV_PIO _IOWR('I', 0, struct iodev_pio_req) + +#endif /* _DEV_IODEV_H_ */ diff --git a/sys/dev/ipw/if_ipw.c b/sys/dev/ipw/if_ipw.c index 0b217583f24..2329c3377e0 100644 --- a/sys/dev/ipw/if_ipw.c +++ b/sys/dev/ipw/if_ipw.c @@ -289,8 +289,8 @@ ipw_attach(device_t dev) ifp->if_init = ipw_init; ifp->if_ioctl = ipw_ioctl; ifp->if_start = ipw_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; @@ -888,10 +888,10 @@ ipw_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) /* * XXX when joining an ibss network we are called * with a SCAN -> RUN transition on scan complete. - * Use that to call ipw_auth_and_assoc. On completing - * the join we are then called again with an - * AUTH -> RUN transition and we want to do nothing. - * This is all totally bogus and needs to be redone. + * Use that to call ipw_assoc. On completing the + * join we are then called again with an AUTH -> RUN + * transition and we want to do nothing. This is + * all totally bogus and needs to be redone. */ if (ostate == IEEE80211_S_SCAN) ipw_assoc(ic, vap); @@ -904,12 +904,19 @@ ipw_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) break; case IEEE80211_S_AUTH: + /* + * Move to ASSOC state after the ipw_assoc() call. Firmware + * takes care of authentication, after the call we'll receive + * only an assoc response which would otherwise be discared + * if we are still in AUTH state. + */ + nstate = IEEE80211_S_ASSOC; ipw_assoc(ic, vap); break; case IEEE80211_S_ASSOC: /* - * If we are not transitioning from AUTH the resend the + * If we are not transitioning from AUTH then resend the * association request. */ if (ostate != IEEE80211_S_AUTH) @@ -1021,7 +1028,6 @@ ipw_rx_newstate_intr(struct ipw_softc *sc, struct ipw_soft_buf *sbuf) } sc->flags &= ~IPW_FLAG_ASSOCIATING; sc->flags |= IPW_FLAG_ASSOCIATED; - ieee80211_new_state(vap, IEEE80211_S_RUN, -1); break; case IPW_STATE_SCANNING: @@ -1034,8 +1040,10 @@ ipw_rx_newstate_intr(struct ipw_softc *sc, struct ipw_soft_buf *sbuf) * we checked the 802.11 layer state. */ if (sc->flags & IPW_FLAG_ASSOCIATED) { + IPW_UNLOCK(sc); /* XXX probably need to issue disassoc to fw */ ieee80211_beacon_miss(ic); + IPW_LOCK(sc); } break; @@ -1054,7 +1062,9 @@ ipw_rx_newstate_intr(struct ipw_softc *sc, struct ipw_soft_buf *sbuf) break; } if (sc->flags & IPW_FLAG_SCANNING) { + IPW_UNLOCK(sc); ieee80211_scan_done(vap); + IPW_LOCK(sc); sc->flags &= ~IPW_FLAG_SCANNING; sc->sc_scan_timer = 0; } @@ -1064,13 +1074,16 @@ ipw_rx_newstate_intr(struct ipw_softc *sc, struct ipw_soft_buf *sbuf) DPRINTFN(2, ("Association lost (%s flags 0x%x)\n", IEEESTATE(vap), sc->flags)); sc->flags &= ~(IPW_FLAG_ASSOCIATING | IPW_FLAG_ASSOCIATED); - if (vap->iv_state == IEEE80211_S_RUN) + if (vap->iv_state == IEEE80211_S_RUN) { + IPW_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); + IPW_LOCK(sc); + } break; case IPW_STATE_DISABLED: /* XXX? is this right? */ - sc->flags &= ~(IPW_FLAG_HACK | IPW_FLAG_SCANNING | + sc->flags &= ~(IPW_FLAG_HACK | IPW_FLAG_SCANNING | IPW_FLAG_ASSOCIATING | IPW_FLAG_ASSOCIATED); DPRINTFN(2, ("Firmware disabled (%s flags 0x%x)\n", IEEESTATE(vap), sc->flags)); @@ -1164,7 +1177,6 @@ ipw_rx_data_intr(struct ipw_softc *sc, struct ipw_status *status, bus_addr_t physaddr; int error; int8_t rssi, nf; - IPW_LOCK_DECL; DPRINTFN(5, ("received frame len=%u, rssi=%u\n", le32toh(status->len), status->rssi)); @@ -1234,10 +1246,10 @@ ipw_rx_data_intr(struct ipw_softc *sc, struct ipw_status *status, IPW_UNLOCK(sc); ni = ieee80211_find_rxnode(ic, mtod(m, struct ieee80211_frame_min *)); if (ni != NULL) { - (void) ieee80211_input(ni, m, rssi, nf); + (void) ieee80211_input(ni, m, rssi - nf, nf); ieee80211_free_node(ni); } else - (void) ieee80211_input_all(ic, m, rssi, nf); + (void) ieee80211_input_all(ic, m, rssi - nf, nf); IPW_LOCK(sc); bus_dmamap_sync(sc->rbd_dmat, sc->rbd_map, BUS_DMASYNC_PREWRITE); @@ -1378,8 +1390,11 @@ ipw_fatal_error_intr(struct ipw_softc *sc) struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); device_printf(sc->sc_dev, "firmware error\n"); - if (vap != NULL) + if (vap != NULL) { + IPW_UNLOCK(sc); ieee80211_cancel_scan(vap); + IPW_LOCK(sc); + } ieee80211_runtask(ic, &sc->sc_init_task); } @@ -1388,7 +1403,6 @@ ipw_intr(void *arg) { struct ipw_softc *sc = arg; uint32_t r; - IPW_LOCK_DECL; IPW_LOCK(sc); @@ -1718,7 +1732,6 @@ static void ipw_start(struct ifnet *ifp) { struct ipw_softc *sc = ifp->if_softc; - IPW_LOCK_DECL; IPW_LOCK(sc); ipw_start_locked(ifp); @@ -1775,7 +1788,9 @@ ipw_watchdog(void *arg) DPRINTFN(3, ("Scan timeout\n")); /* End the scan */ if (sc->flags & IPW_FLAG_SCANNING) { + IPW_UNLOCK(sc); ieee80211_scan_done(TAILQ_FIRST(&ic->ic_vaps)); + IPW_LOCK(sc); sc->flags &= ~IPW_FLAG_SCANNING; } } @@ -1791,7 +1806,6 @@ ipw_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ieee80211com *ic = ifp->if_l2com; struct ifreq *ifr = (struct ifreq *) data; int error = 0, startall = 0; - IPW_LOCK_DECL; switch (cmd) { case SIOCSIFFLAGS: @@ -2201,7 +2215,6 @@ ipw_assoc(struct ieee80211com *ic, struct ieee80211vap *vap) struct ipw_security security; uint32_t data; int error; - IPW_LOCK_DECL; IPW_LOCK(sc); error = ipw_disable(sc); @@ -2260,8 +2273,8 @@ ipw_assoc(struct ieee80211com *ic, struct ieee80211vap *vap) if (error != 0) goto done; - if (vap->iv_appie_assocreq != NULL) { - struct ieee80211_appie *ie = vap->iv_appie_assocreq; + if (vap->iv_appie_wpa != NULL) { + struct ieee80211_appie *ie = vap->iv_appie_wpa; error = ipw_setwpaie(sc, ie->ie_data, ie->ie_len); if (error != 0) goto done; @@ -2291,7 +2304,6 @@ ipw_disassoc(struct ieee80211com *ic, struct ieee80211vap *vap) struct ifnet *ifp = vap->iv_ic->ic_ifp; struct ieee80211_node *ni = vap->iv_bss; struct ipw_softc *sc = ifp->if_softc; - IPW_LOCK_DECL; IPW_LOCK(sc); DPRINTF(("Disassociate from %6D\n", ni->ni_bssid, ":")); @@ -2327,7 +2339,6 @@ ipw_init(void *priv) struct ipw_softc *sc = priv; struct ifnet *ifp = sc->sc_ifp; struct ieee80211com *ic = ifp->if_l2com; - IPW_LOCK_DECL; IPW_LOCK(sc); ipw_init_locked(sc); @@ -2534,7 +2545,6 @@ static void ipw_stop(void *priv) { struct ipw_softc *sc = priv; - IPW_LOCK_DECL; IPW_LOCK(sc); ipw_stop_locked(sc); @@ -2661,7 +2671,6 @@ ipw_scan_start(struct ieee80211com *ic) { struct ifnet *ifp = ic->ic_ifp; struct ipw_softc *sc = ifp->if_softc; - IPW_LOCK_DECL; IPW_LOCK(sc); ipw_scan(sc); @@ -2673,7 +2682,6 @@ ipw_set_channel(struct ieee80211com *ic) { struct ifnet *ifp = ic->ic_ifp; struct ipw_softc *sc = ifp->if_softc; - IPW_LOCK_DECL; IPW_LOCK(sc); if (ic->ic_opmode == IEEE80211_M_MONITOR) { @@ -2701,7 +2709,6 @@ ipw_scan_end(struct ieee80211com *ic) { struct ifnet *ifp = ic->ic_ifp; struct ipw_softc *sc = ifp->if_softc; - IPW_LOCK_DECL; IPW_LOCK(sc); sc->flags &= ~IPW_FLAG_SCANNING; diff --git a/sys/dev/ipw/if_ipwvar.h b/sys/dev/ipw/if_ipwvar.h index 8d9e049ccfb..89702d0c634 100644 --- a/sys/dev/ipw/if_ipwvar.h +++ b/sys/dev/ipw/if_ipwvar.h @@ -164,13 +164,6 @@ struct ipw_softc { * NB.: This models the only instance of async locking in ipw_init_locked * and must be kept in sync. */ -#define IPW_LOCK_DECL int __waslocked = 0 -#define IPW_LOCK(sc) do { \ - if (!(__waslocked = mtx_owned(&(sc)->sc_mtx))) \ - mtx_lock(&sc->sc_mtx); \ -} while (0) -#define IPW_UNLOCK(sc) do { \ - if (!__waslocked) \ - mtx_unlock(&sc->sc_mtx); \ -} while (0) +#define IPW_LOCK(sc) mtx_lock(&sc->sc_mtx); +#define IPW_UNLOCK(sc) mtx_unlock(&sc->sc_mtx); #define IPW_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) diff --git a/sys/dev/isp/isp_pci.c b/sys/dev/isp/isp_pci.c index 8410ea64d72..e152e99ee67 100644 --- a/sys/dev/isp/isp_pci.c +++ b/sys/dev/isp/isp_pci.c @@ -46,6 +46,11 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef __sparc64__ +#include +#include +#endif + #include static uint32_t isp_pci_rd_reg(ispsoftc_t *, int); @@ -517,7 +522,11 @@ isp_get_specific_options(device_t dev, int chan, ispsoftc_t *isp) if (IS_FC(isp)) { ISP_FC_PC(isp, chan)->default_id = 109 - chan; } else { +#ifdef __sparc64__ + ISP_SPI_PC(isp, chan)->iid = OF_getscsinitid(dev); +#else ISP_SPI_PC(isp, chan)->iid = 7; +#endif } } else { if (IS_FC(isp)) { diff --git a/sys/dev/isp/isp_sbus.c b/sys/dev/isp/isp_sbus.c index 895645a673a..34dcc09f9e5 100644 --- a/sys/dev/isp/isp_sbus.c +++ b/sys/dev/isp/isp_sbus.c @@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include +#include #include #include #include @@ -264,11 +266,7 @@ isp_sbus_attach(device_t dev) isp->isp_confopts |= ISP_CFG_OWNLOOPID; } if (default_id == -1) { - /* - * XXX: should be a way to get properties w/o having - * XXX: to call OF_xxx functions - */ - default_id = 7; + default_id = OF_getscsinitid(dev); } ISP_SPI_PC(isp, 0)->iid = default_id; diff --git a/sys/dev/iwi/if_iwi.c b/sys/dev/iwi/if_iwi.c index 1105425da63..4b765ef16e9 100644 --- a/sys/dev/iwi/if_iwi.c +++ b/sys/dev/iwi/if_iwi.c @@ -363,8 +363,8 @@ iwi_attach(device_t dev) ifp->if_init = iwi_init; ifp->if_ioctl = iwi_ioctl; ifp->if_start = iwi_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/iwn/if_iwn.c b/sys/dev/iwn/if_iwn.c index 2e60da3bf1b..39b26b5d55d 100644 --- a/sys/dev/iwn/if_iwn.c +++ b/sys/dev/iwn/if_iwn.c @@ -627,8 +627,8 @@ iwn_attach(device_t dev) ifp->if_init = iwn_init; ifp->if_ioctl = iwn_ioctl; ifp->if_start = iwn_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ieee80211_ifattach(ic, macaddr); @@ -2282,6 +2282,7 @@ iwn4965_tx_done(struct iwn_softc *sc, struct iwn_rx_desc *desc, struct iwn_rx_data *data) { struct iwn4965_tx_stat *stat = (struct iwn4965_tx_stat *)(desc + 1); + struct iwn_tx_ring *ring = &sc->txq[desc->qid & 0xf]; DPRINTF(sc, IWN_DEBUG_XMIT, "%s: " "qid %d idx %d retries %d nkill %d rate %x duration %d status %x\n", @@ -2289,7 +2290,7 @@ iwn4965_tx_done(struct iwn_softc *sc, struct iwn_rx_desc *desc, stat->btkillcnt, stat->rate, le16toh(stat->duration), le32toh(stat->status)); - bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); iwn_tx_done(sc, desc, stat->ackfailcnt, le32toh(stat->status) & 0xff); } @@ -2298,6 +2299,7 @@ iwn5000_tx_done(struct iwn_softc *sc, struct iwn_rx_desc *desc, struct iwn_rx_data *data) { struct iwn5000_tx_stat *stat = (struct iwn5000_tx_stat *)(desc + 1); + struct iwn_tx_ring *ring = &sc->txq[desc->qid & 0xf]; DPRINTF(sc, IWN_DEBUG_XMIT, "%s: " "qid %d idx %d retries %d nkill %d rate %x duration %d status %x\n", @@ -2310,7 +2312,7 @@ iwn5000_tx_done(struct iwn_softc *sc, struct iwn_rx_desc *desc, iwn5000_reset_sched(sc, desc->qid & 0xf, desc->idx); #endif - bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); iwn_tx_done(sc, desc, stat->ackfailcnt, le16toh(stat->status) & 0xff); } @@ -4728,7 +4730,7 @@ iwn_scan(struct iwn_softc *sc) chan->passive = htole16(78); else chan->passive = htole16(110); - hdr->crc_threshold = htole16(1); + hdr->crc_threshold = 0xffff; } else if (!(c->ic_flags & IEEE80211_CHAN_PASSIVE)) { chan->rf_gain = 0x28; chan->active = htole16(36); @@ -4741,7 +4743,7 @@ iwn_scan(struct iwn_softc *sc) chan->passive = htole16(88); else chan->passive = htole16(120); - hdr->crc_threshold = htole16(1); + hdr->crc_threshold = 0xffff; } DPRINTF(sc, IWN_DEBUG_STATE, diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index f8338e9aee4..9cc392db955 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -759,6 +759,7 @@ ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) /* Set watchdog on */ txr->watchdog_check = TRUE; + txr->watchdog_time = ticks; } return; @@ -798,8 +799,6 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; - else /* use the cpu we're on */ - i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; @@ -856,8 +855,11 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) next = drbr_dequeue(ifp, txr->br); } - if (enqueued > 0) + if (enqueued > 0) { + /* Set watchdog on */ txr->watchdog_check = TRUE; + txr->watchdog_time = ticks; + } return (err); } @@ -1251,16 +1253,12 @@ ixgbe_handle_que(void *context, int pending) struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; - u32 loop = MAX_LOOP; - bool more_rx, more_tx; - - IXGBE_TX_LOCK(txr); - do { - more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); - more_tx = ixgbe_txeof(txr); - } while (loop-- && (more_rx || more_tx)); + bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + more = ixgbe_rxeof(que, adapter->rx_process_limit); + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); #if __FreeBSD_version >= 800000 if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr, NULL); @@ -1268,11 +1266,16 @@ ixgbe_handle_que(void *context, int pending) if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #endif + IXGBE_TX_UNLOCK(txr); + if (more) { + taskqueue_enqueue(que->tq, &que->que_task); + return; + } } - IXGBE_TX_UNLOCK(txr); /* Reenable this interrupt */ ixgbe_enable_queue(adapter, que->msix); + return; } @@ -1718,7 +1721,6 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) * hardware that this frame is available to transmit. */ ++txr->total_packets; - txr->watchdog_time = ticks; IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); /* Do a clean if descriptors are low */ diff --git a/sys/dev/le/lance.c b/sys/dev/le/lance.c index db200995fa7..8700aa2cd43 100644 --- a/sys/dev/le/lance.c +++ b/sys/dev/le/lance.c @@ -133,8 +133,8 @@ lance_config(struct lance_softc *sc, const char* name, int unit) ifp->if_flags &= ~IFF_MULTICAST; #endif ifp->if_baudrate = IF_Mbps(10); - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* Initialize ifmedia structures. */ diff --git a/sys/dev/malo/if_malo.c b/sys/dev/malo/if_malo.c index 57e0d3d8fa3..ae6ef882652 100644 --- a/sys/dev/malo/if_malo.c +++ b/sys/dev/malo/if_malo.c @@ -275,8 +275,8 @@ malo_attach(uint16_t devid, struct malo_softc *sc) ifp->if_start = malo_start; ifp->if_ioctl = malo_ioctl; ifp->if_init = malo_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 78f2af3673c..edd687fca9e 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -665,11 +665,13 @@ mdstart_swap(struct md_s *sc, struct bio *bp) sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); + vm_page_lock(m); vm_page_lock_queues(); vm_page_activate(m); if (bp->bio_cmd == BIO_WRITE) vm_page_dirty(m); vm_page_unlock_queues(); + vm_page_unlock(m); /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; diff --git a/sys/dev/mii/brgphy.c b/sys/dev/mii/brgphy.c index 97a423cd27e..a733597fe76 100644 --- a/sys/dev/mii/brgphy.c +++ b/sys/dev/mii/brgphy.c @@ -685,16 +685,15 @@ brgphy_status(struct mii_softc *sc) } -#if 0 - /* Todo: Change bge/bce to use these settings. */ + /* Todo: Change bge to use these settings. */ - /* Fetch flow control settings from the PHY */ + /* Fetch flow control settings from the copper PHY. */ if ((sc->mii_flags & MIIF_HAVEFIBER) == 0) { - /* Set FLAG0 is RX is enabled and FLAG1 if TX is enabled */ + /* Set FLAG0 if RX is enabled and FLAG1 if TX is enabled */ if ((anar & BRGPHY_ANAR_PC) && (anlpar & BRGPHY_ANLPAR_PC)) { mii->mii_media_active |= IFM_FLAG0 | IFM_FLAG1; } else if (!(anar & BRGPHY_ANAR_PC) && (anlpar & BRGPHY_ANAR_ASP) && - (anlpar & BRPHY_ANLPAR_PC) && (anlpar & BRGPHY_ANLPAR_ASP)) { + (anlpar & BRGPHY_ANLPAR_PC) && (anlpar & BRGPHY_ANLPAR_ASP)) { mii->mii_media_active |= IFM_FLAG1; } else if ((anar & BRGPHY_ANAR_PC) && (anar & BRGPHY_ANAR_ASP) && !(anlpar & BRGPHY_ANLPAR_PC) && (anlpar & BRGPHY_ANLPAR_ASP)) { @@ -703,7 +702,6 @@ brgphy_status(struct mii_softc *sc) } /* Todo: Add support for fiber settings too. */ -#endif brgphy_status_exit: diff --git a/sys/dev/mii/e1000phy.c b/sys/dev/mii/e1000phy.c index b50eb07a651..f30f91ce7a0 100644 --- a/sys/dev/mii/e1000phy.c +++ b/sys/dev/mii/e1000phy.c @@ -112,6 +112,7 @@ static const struct mii_phydesc e1000phys[] = { MII_PHY_DESC(MARVELL, E1116R), MII_PHY_DESC(MARVELL, E1118), MII_PHY_DESC(MARVELL, E3016), + MII_PHY_DESC(MARVELL, PHYG65G), MII_PHY_DESC(xxMARVELL, E1000), MII_PHY_DESC(xxMARVELL, E1011), MII_PHY_DESC(xxMARVELL, E1000_3), @@ -230,6 +231,7 @@ e1000phy_reset(struct mii_softc *sc) case MII_MODEL_MARVELL_E1116: case MII_MODEL_MARVELL_E1118: case MII_MODEL_MARVELL_E1149: + case MII_MODEL_MARVELL_PHYG65G: /* Disable energy detect mode. */ reg &= ~E1000_SCR_EN_DETECT_MASK; reg |= E1000_SCR_AUTO_X_MODE; diff --git a/sys/dev/mii/miidevs b/sys/dev/mii/miidevs index 30cd09c4071..bbf668b8c45 100644 --- a/sys/dev/mii/miidevs +++ b/sys/dev/mii/miidevs @@ -247,6 +247,7 @@ model MARVELL E1116 0x0021 Marvell 88E1116 Gigabit PHY model MARVELL E1116R 0x0024 Marvell 88E1116R Gigabit PHY model MARVELL E1118 0x0022 Marvell 88E1118 Gigabit PHY model MARVELL E3016 0x0026 Marvell 88E3016 10/100 Fast Ethernet PHY +model MARVELL PHYG65G 0x0027 Marvell PHYG65G Gigabit PHY model xxMARVELL E1000 0x0005 Marvell 88E1000 Gigabit PHY model xxMARVELL E1011 0x0002 Marvell 88E1011 Gigabit PHY model xxMARVELL E1000_3 0x0003 Marvell 88E1000 Gigabit PHY diff --git a/sys/dev/mpt/mpt.c b/sys/dev/mpt/mpt.c index 6a74dc3c54d..40c581c6564 100644 --- a/sys/dev/mpt/mpt.c +++ b/sys/dev/mpt/mpt.c @@ -2667,6 +2667,8 @@ mpt_configure_ioc(struct mpt_softc *mpt, int tn, int needreset) mpt->is_fc = 0; mpt->is_sas = 0; mpt->is_spi = 1; + if (mpt->mpt_ini_id == MPT_INI_ID_NONE) + mpt->mpt_ini_id = pfp->PortSCSIID; } else if (pfp->PortType == MPI_PORTFACTS_PORTTYPE_ISCSI) { mpt_prt(mpt, "iSCSI not supported yet\n"); return (ENXIO); diff --git a/sys/dev/mpt/mpt.h b/sys/dev/mpt/mpt.h index 1fa2d1aaded..ab4cfa6c4ff 100644 --- a/sys/dev/mpt/mpt.h +++ b/sys/dev/mpt/mpt.h @@ -130,6 +130,11 @@ #include #endif +#ifdef __sparc64__ +#include +#include +#endif + #include #if __FreeBSD_version < 500000 @@ -172,6 +177,8 @@ #define MPT_ROLE_BOTH 3 #define MPT_ROLE_DEFAULT MPT_ROLE_INITIATOR +#define MPT_INI_ID_NONE -1 + /**************************** Forward Declarations ****************************/ struct mpt_softc; struct mpt_personality; @@ -637,7 +644,6 @@ struct mpt_softc { * Port Facts */ MSG_PORT_FACTS_REPLY * port_facts; -#define mpt_ini_id port_facts[0].PortSCSIID #define mpt_max_tgtcmds port_facts[0].MaxPostedCmdBuffers /* @@ -650,6 +656,7 @@ struct mpt_softc { CONFIG_PAGE_SCSI_PORT_2 _port_page2; CONFIG_PAGE_SCSI_DEVICE_0 _dev_page0[16]; CONFIG_PAGE_SCSI_DEVICE_1 _dev_page1[16]; + int _ini_id; uint16_t _tag_enable; uint16_t _disc_enable; } spi; @@ -658,6 +665,7 @@ struct mpt_softc { #define mpt_port_page2 cfg.spi._port_page2 #define mpt_dev_page0 cfg.spi._dev_page0 #define mpt_dev_page1 cfg.spi._dev_page1 +#define mpt_ini_id cfg.spi._ini_id #define mpt_tag_enable cfg.spi._tag_enable #define mpt_disc_enable cfg.spi._disc_enable struct mpi_fc_cfg { diff --git a/sys/dev/mpt/mpt_cam.c b/sys/dev/mpt/mpt_cam.c index 5e63ef908bd..39b57de20ac 100644 --- a/sys/dev/mpt/mpt_cam.c +++ b/sys/dev/mpt/mpt_cam.c @@ -1058,12 +1058,13 @@ mpt_read_config_info_spi(struct mpt_softc *mpt) static int mpt_set_initial_config_spi(struct mpt_softc *mpt) { - int i, pp1val = ((1 << mpt->mpt_ini_id) << 16) | mpt->mpt_ini_id; - int error; + int error, i, pp1val; mpt->mpt_disc_enable = 0xff; mpt->mpt_tag_enable = 0; + pp1val = ((1 << mpt->mpt_ini_id) << + MPI_SCSIPORTPAGE1_CFG_SHIFT_PORT_RESPONSE_ID) | mpt->mpt_ini_id; if (mpt->mpt_port_page1.Configuration != pp1val) { CONFIG_PAGE_SCSI_PORT_1 tmp; @@ -2574,6 +2575,10 @@ mpt_cam_event(struct mpt_softc *mpt, request_t *req, CAMLOCK_2_MPTLOCK(mpt); break; } + case MPI_EVENT_IR_RESYNC_UPDATE: + mpt_prt(mpt, "IR resync update %d completed\n", + (data0 >> 16) & 0xff); + break; case MPI_EVENT_EVENT_CHANGE: case MPI_EVENT_INTEGRATED_RAID: case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE: diff --git a/sys/dev/mpt/mpt_pci.c b/sys/dev/mpt/mpt_pci.c index 1e9dc921c09..e3b0ea35994 100644 --- a/sys/dev/mpt/mpt_pci.c +++ b/sys/dev/mpt/mpt_pci.c @@ -460,6 +460,11 @@ mpt_pci_attach(device_t dev) mpt->raid_queue_depth = MPT_RAID_QUEUE_DEPTH_DEFAULT; mpt->verbose = MPT_PRT_NONE; mpt->role = MPT_ROLE_NONE; + mpt->mpt_ini_id = MPT_INI_ID_NONE; +#ifdef __sparc64__ + if (mpt->is_spi) + mpt->mpt_ini_id = OF_getscsinitid(dev); +#endif mpt_set_options(mpt); if (mpt->verbose == MPT_PRT_NONE) { mpt->verbose = MPT_PRT_WARN; diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c index 61b131bf451..3f97c3a9f5c 100644 --- a/sys/dev/msk/if_msk.c +++ b/sys/dev/msk/if_msk.c @@ -223,6 +223,8 @@ static struct msk_product { "Marvell Yukon 88E8072 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4380, "Marvell Yukon 88E8057 Gigabit Ethernet" }, + { VENDORID_MARVELL, DEVICEID_MRVL_4381, + "Marvell Yukon 88E8059 Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE550SX, "D-Link 550SX Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE560SX, @@ -239,7 +241,9 @@ static const char *model_name[] = { "Yukon FE", "Yukon FE+", "Yukon Supreme", - "Yukon Ultra 2" + "Yukon Ultra 2", + "Yukon Unknown", + "Yukon Optima", }; static int mskc_probe(device_t); @@ -1097,7 +1101,8 @@ msk_ioctl(struct ifnet *ifp, u_long command, caddr_t data) (IFCAP_VLAN_HWTAGGING & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if ((IFCAP_VLAN_HWTAGGING & ifp->if_capenable) == 0) - ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; + ifp->if_capenable &= + ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); msk_setvlan(sc_if, ifp); } if (ifp->if_mtu > ETHERMTU && @@ -1229,6 +1234,7 @@ msk_phy_power(struct msk_softc *sc, int mode) case CHIP_ID_YUKON_EX: case CHIP_ID_YUKON_FE_P: case CHIP_ID_YUKON_UL_2: + case CHIP_ID_YUKON_OPT: CSR_WRITE_2(sc, B0_CTST, Y2_HW_WOL_OFF); /* Enable all clocks. */ @@ -1372,6 +1378,10 @@ mskc_reset(struct msk_softc *sc) GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON | GMC_BYP_RETR_ON); } + if (sc->msk_hw_id == CHIP_ID_YUKON_OPT && sc->msk_hw_rev == 0) { + /* Disable PCIe PHY powerdown(reg 0x80, bit7). */ + CSR_WRITE_4(sc, Y2_PEX_PHY_DATA, (0x0080 << 16) | 0x0080); + } CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); /* LED On. */ @@ -1705,8 +1715,9 @@ mskc_attach(device_t dev) sc->msk_hw_rev = (CSR_READ_1(sc, B2_MAC_CFG) >> 4) & 0x0f; /* Bail out if chip is not recognized. */ if (sc->msk_hw_id < CHIP_ID_YUKON_XL || - sc->msk_hw_id > CHIP_ID_YUKON_UL_2 || - sc->msk_hw_id == CHIP_ID_YUKON_SUPR) { + sc->msk_hw_id > CHIP_ID_YUKON_OPT || + sc->msk_hw_id == CHIP_ID_YUKON_SUPR || + sc->msk_hw_id == CHIP_ID_YUKON_UNKNOWN) { device_printf(dev, "unknown device: id=0x%02x, rev=0x%02x\n", sc->msk_hw_id, sc->msk_hw_rev); mtx_destroy(&sc->msk_mtx); @@ -1819,6 +1830,10 @@ mskc_attach(device_t dev) sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; + case CHIP_ID_YUKON_OPT: + sc->msk_clock = 125; /* 125 MHz */ + sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2; + break; default: sc->msk_clock = 156; /* 156 MHz */ break; @@ -2605,23 +2620,32 @@ msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) ip = (struct ip *)(mtod(m, char *) + offset); offset += (ip->ip_hl << 2); tcp_offset = offset; - /* - * It seems that Yukon II has Tx checksum offload bug for - * small TCP packets that's less than 60 bytes in size - * (e.g. TCP window probe packet, pure ACK packet). - * Common work around like padding with zeros to make the - * frame minimum ethernet frame size didn't work at all. - * Instead of disabling checksum offload completely we - * resort to S/W checksum routine when we encounter short - * TCP frames. - * Short UDP packets appear to be handled correctly by - * Yukon II. Also I assume this bug does not happen on - * controllers that use newer descriptor format or - * automatic Tx checksum calaulcation. - */ - if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && + if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + m = m_pullup(m, offset + sizeof(struct tcphdr)); + if (m == NULL) { + *m_head = NULL; + return (ENOBUFS); + } + tcp = (struct tcphdr *)(mtod(m, char *) + offset); + offset += (tcp->th_off << 2); + } else if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && (m->m_pkthdr.len < MSK_MIN_FRAMELEN) && (m->m_pkthdr.csum_flags & CSUM_TCP) != 0) { + /* + * It seems that Yukon II has Tx checksum offload bug + * for small TCP packets that's less than 60 bytes in + * size (e.g. TCP window probe packet, pure ACK packet). + * Common work around like padding with zeros to make + * the frame minimum ethernet frame size didn't work at + * all. + * Instead of disabling checksum offload completely we + * resort to S/W checksum routine when we encounter + * short TCP frames. + * Short UDP packets appear to be handled correctly by + * Yukon II. Also I assume this bug does not happen on + * controllers that use newer descriptor format or + * automatic Tx checksum calaulcation. + */ m = m_pullup(m, offset + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; @@ -2632,15 +2656,6 @@ msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) m->m_pkthdr.len, offset); m->m_pkthdr.csum_flags &= ~CSUM_TCP; } - if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { - m = m_pullup(m, offset + sizeof(struct tcphdr)); - if (m == NULL) { - *m_head = NULL; - return (ENOBUFS); - } - tcp = (struct tcphdr *)(mtod(m, char *) + offset); - offset += (tcp->th_off << 2); - } *m_head = m; } @@ -2890,20 +2905,15 @@ mskc_shutdown(device_t dev) sc = device_get_softc(dev); MSK_LOCK(sc); for (i = 0; i < sc->msk_num_port; i++) { - if (sc->msk_if[i] != NULL) + if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL && + ((sc->msk_if[i]->msk_ifp->if_drv_flags & + IFF_DRV_RUNNING) != 0)) msk_stop(sc->msk_if[i]); } - - /* Disable all interrupts. */ - CSR_WRITE_4(sc, B0_IMSK, 0); - CSR_READ_4(sc, B0_IMSK); - CSR_WRITE_4(sc, B0_HWE_IMSK, 0); - CSR_READ_4(sc, B0_HWE_IMSK); + MSK_UNLOCK(sc); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); - - MSK_UNLOCK(sc); return (0); } @@ -3511,6 +3521,8 @@ msk_handle_events(struct msk_softc *sc) sc_if->msk_csum = status; break; case OP_RXSTAT: + if (!(sc_if->msk_ifp->if_drv_flags & IFF_DRV_RUNNING)) + break; if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) msk_jumbo_rxeof(sc_if, status, control, len); @@ -3580,6 +3592,7 @@ msk_intr(void *xsc) (sc->msk_pflags & MSK_FLAG_SUSPEND) != 0 || (status & sc->msk_intrmask) == 0) { CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2); + MSK_UNLOCK(sc); return; } @@ -3822,9 +3835,9 @@ msk_init_locked(struct msk_if_softc *sc_if) if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) { /* Set Rx Pause threshould. */ - CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR), + CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR), MSK_ECU_LLPP); - CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, RX_GMF_UP_THR), + CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_UP_THR), MSK_ECU_ULPP); /* Configure store-and-forward for Tx. */ msk_set_tx_stfwd(sc_if); @@ -3917,6 +3930,11 @@ msk_init_locked(struct msk_if_softc *sc_if) msk_stop(sc_if); return; } + if (sc->msk_hw_id == CHIP_ID_YUKON_EX) { + /* Disable flushing of non-ASF packets. */ + CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), + GMF_RX_MACSEC_FLUSH_OFF); + } /* Configure interrupt handling. */ if (sc_if->msk_port == MSK_PORT_A) { diff --git a/sys/dev/msk/if_mskreg.h b/sys/dev/msk/if_mskreg.h index 34cac676305..3b05b9ba858 100644 --- a/sys/dev/msk/if_mskreg.h +++ b/sys/dev/msk/if_mskreg.h @@ -145,6 +145,7 @@ #define DEVICEID_MRVL_436B 0x436B #define DEVICEID_MRVL_436C 0x436C #define DEVICEID_MRVL_4380 0x4380 +#define DEVICEID_MRVL_4381 0x4381 /* * D-Link gigabit ethernet device ID @@ -621,8 +622,8 @@ #define RX_GMF_FL_MSK 0x0c4c /* 32 bit Rx GMAC FIFO Flush Mask */ #define RX_GMF_FL_THR 0x0c50 /* 32 bit Rx GMAC FIFO Flush Threshold */ #define RX_GMF_TR_THR 0x0c54 /* 32 bit Rx Truncation Threshold (Yukon-2) */ -#define RX_GMF_UP_THR 0x0c58 /* 8 bit Rx Upper Pause Thr (Yukon-EC_U) */ -#define RX_GMF_LP_THR 0x0c5a /* 8 bit Rx Lower Pause Thr (Yukon-EC_U) */ +#define RX_GMF_UP_THR 0x0c58 /* 16 bit Rx Upper Pause Thr (Yukon-EC_U) */ +#define RX_GMF_LP_THR 0x0c5a /* 16 bit Rx Lower Pause Thr (Yukon-EC_U) */ #define RX_GMF_VLAN 0x0c5c /* 32 bit Rx VLAN Type Register (Yukon-2) */ #define RX_GMF_WP 0x0c60 /* 32 bit Rx GMAC FIFO Write Pointer */ #define RX_GMF_WLEV 0x0c68 /* 32 bit Rx GMAC FIFO Write Level */ @@ -828,6 +829,9 @@ #define Y2_IS_CHK_RX2 BIT_10 /* Descriptor error Rx 2 */ #define Y2_IS_CHK_TXS2 BIT_9 /* Descriptor error TXS 2 */ #define Y2_IS_CHK_TXA2 BIT_8 /* Descriptor error TXA 2 */ +#define Y2_IS_PSM_ACK BIT_7 /* PSM Ack (Yukon Optima) */ +#define Y2_IS_PTP_TIST BIT_6 /* PTP TIme Stamp (Yukon Optima) */ +#define Y2_IS_PHY_QLNK BIT_5 /* PHY Quick Link (Yukon Optima) */ #define Y2_IS_IRQ_PHY1 BIT_4 /* Interrupt from PHY 1 */ #define Y2_IS_IRQ_MAC1 BIT_3 /* Interrupt from MAC 1 */ #define Y2_IS_CHK_RX1 BIT_2 /* Descriptor error Rx 1 */ @@ -894,6 +898,8 @@ #define CHIP_ID_YUKON_FE_P 0xb8 /* Chip ID for YUKON-2 FE+ */ #define CHIP_ID_YUKON_SUPR 0xb9 /* Chip ID for YUKON-2 Supreme */ #define CHIP_ID_YUKON_UL_2 0xba /* Chip ID for YUKON-2 Ultra 2 */ +#define CHIP_ID_YUKON_UNKNOWN 0xbb +#define CHIP_ID_YUKON_OPT 0xbc /* Chip ID for YUKON-2 Optima */ #define CHIP_REV_YU_XL_A0 0 /* Chip Rev. for Yukon-2 A0 */ #define CHIP_REV_YU_XL_A1 1 /* Chip Rev. for Yukon-2 A1 */ @@ -1941,6 +1947,8 @@ #define RX_TRUNC_OFF BIT_26 /* disable packet truncation */ #define RX_VLAN_STRIP_ON BIT_25 /* enable VLAN stripping */ #define RX_VLAN_STRIP_OFF BIT_24 /* disable VLAN stripping */ +#define GMF_RX_MACSEC_FLUSH_ON BIT_23 +#define GMF_RX_MACSEC_FLUSH_OFF BIT_22 #define GMF_RX_OVER_ON BIT_19 /* enable flushing on receive overrun */ #define GMF_RX_OVER_OFF BIT_18 /* disable flushing on receive overrun */ #define GMF_ASF_RX_OVER_ON BIT_17 /* enable flushing of ASF when overrun */ diff --git a/sys/dev/mvs/mvs.c b/sys/dev/mvs/mvs.c new file mode 100644 index 00000000000..2694d9cc380 --- /dev/null +++ b/sys/dev/mvs/mvs.c @@ -0,0 +1,2173 @@ +/*- + * Copyright (c) 2010 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mvs.h" + +#include +#include +#include +#include +#include + +/* local prototypes */ +static int mvs_ch_suspend(device_t dev); +static int mvs_ch_resume(device_t dev); +static void mvs_dmainit(device_t dev); +static void mvs_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); +static void mvs_dmafini(device_t dev); +static void mvs_slotsalloc(device_t dev); +static void mvs_slotsfree(device_t dev); +static void mvs_setup_edma_queues(device_t dev); +static void mvs_set_edma_mode(device_t dev, enum mvs_edma_mode mode); +static void mvs_ch_pm(void *arg); +static void mvs_ch_intr_locked(void *data); +static void mvs_ch_intr(void *data); +static void mvs_reset(device_t dev); +static void mvs_softreset(device_t dev, union ccb *ccb); + +static int mvs_sata_connect(struct mvs_channel *ch); +static int mvs_sata_phy_reset(device_t dev); +static int mvs_wait(device_t dev, u_int s, u_int c, int t); +static void mvs_tfd_read(device_t dev, union ccb *ccb); +static void mvs_tfd_write(device_t dev, union ccb *ccb); +static void mvs_legacy_intr(device_t dev); +static void mvs_crbq_intr(device_t dev); +static void mvs_begin_transaction(device_t dev, union ccb *ccb); +static void mvs_legacy_execute_transaction(struct mvs_slot *slot); +static void mvs_timeout(struct mvs_slot *slot); +static void mvs_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error); +static void mvs_requeue_frozen(device_t dev); +static void mvs_execute_transaction(struct mvs_slot *slot); +static void mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et); + +static void mvs_issue_read_log(device_t dev); +static void mvs_process_read_log(device_t dev, union ccb *ccb); + +static void mvsaction(struct cam_sim *sim, union ccb *ccb); +static void mvspoll(struct cam_sim *sim); + +MALLOC_DEFINE(M_MVS, "MVS driver", "MVS driver data buffers"); + +static int +mvs_ch_probe(device_t dev) +{ + + device_set_desc_copy(dev, "Marvell SATA channel"); + return (0); +} + +static int +mvs_ch_attach(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(device_get_parent(dev)); + struct mvs_channel *ch = device_get_softc(dev); + struct cam_devq *devq; + int rid, error, i, sata_rev = 0; + + ch->dev = dev; + ch->unit = (intptr_t)device_get_ivars(dev); + ch->quirks = ctlr->quirks; + mtx_init(&ch->mtx, "MVS channel lock", NULL, MTX_DEF); + resource_int_value(device_get_name(dev), + device_get_unit(dev), "pm_level", &ch->pm_level); + if (ch->pm_level > 3) + callout_init_mtx(&ch->pm_timer, &ch->mtx, 0); + resource_int_value(device_get_name(dev), + device_get_unit(dev), "sata_rev", &sata_rev); + for (i = 0; i < 16; i++) { + ch->user[i].revision = sata_rev; + ch->user[i].mode = 0; + ch->user[i].bytecount = (ch->quirks & MVS_Q_GENIIE) ? 8192 : 2048; + ch->user[i].tags = MVS_MAX_SLOTS; + ch->curr[i] = ch->user[i]; + if (ch->pm_level) { + ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ | + CTS_SATA_CAPS_H_APST | + CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST; + } + } + rid = ch->unit; + if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE))) + return (ENXIO); + mvs_dmainit(dev); + mvs_slotsalloc(dev); + mvs_ch_resume(dev); + mtx_lock(&ch->mtx); + rid = ATA_IRQ_RID; + if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &rid, RF_SHAREABLE | RF_ACTIVE))) { + device_printf(dev, "Unable to map interrupt\n"); + error = ENXIO; + goto err0; + } + if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL, + mvs_ch_intr_locked, dev, &ch->ih))) { + device_printf(dev, "Unable to setup interrupt\n"); + error = ENXIO; + goto err1; + } + /* Create the device queue for our SIM. */ + devq = cam_simq_alloc(MVS_MAX_SLOTS - 1); + if (devq == NULL) { + device_printf(dev, "Unable to allocate simq\n"); + error = ENOMEM; + goto err1; + } + /* Construct SIM entry */ + ch->sim = cam_sim_alloc(mvsaction, mvspoll, "mvsch", ch, + device_get_unit(dev), &ch->mtx, + 2, (ch->quirks & MVS_Q_GENI) ? 0 : MVS_MAX_SLOTS - 1, + devq); + if (ch->sim == NULL) { + cam_simq_free(devq); + device_printf(dev, "unable to allocate sim\n"); + error = ENOMEM; + goto err1; + } + if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) { + device_printf(dev, "unable to register xpt bus\n"); + error = ENXIO; + goto err2; + } + if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim), + CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { + device_printf(dev, "unable to create path\n"); + error = ENXIO; + goto err3; + } + if (ch->pm_level > 3) { + callout_reset(&ch->pm_timer, + (ch->pm_level == 4) ? hz / 1000 : hz / 8, + mvs_ch_pm, dev); + } + mtx_unlock(&ch->mtx); + return (0); + +err3: + xpt_bus_deregister(cam_sim_path(ch->sim)); +err2: + cam_sim_free(ch->sim, /*free_devq*/TRUE); +err1: + bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); +err0: + bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); + mtx_unlock(&ch->mtx); + mtx_destroy(&ch->mtx); + return (error); +} + +static int +mvs_ch_detach(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + + mtx_lock(&ch->mtx); + xpt_async(AC_LOST_DEVICE, ch->path, NULL); + xpt_free_path(ch->path); + xpt_bus_deregister(cam_sim_path(ch->sim)); + cam_sim_free(ch->sim, /*free_devq*/TRUE); + mtx_unlock(&ch->mtx); + + if (ch->pm_level > 3) + callout_drain(&ch->pm_timer); + bus_teardown_intr(dev, ch->r_irq, ch->ih); + bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); + + mvs_ch_suspend(dev); + mvs_slotsfree(dev); + mvs_dmafini(dev); + + bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); + mtx_destroy(&ch->mtx); + return (0); +} + +static int +mvs_ch_suspend(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + + /* Stop EDMA */ + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + /* Disable port interrupts. */ + ATA_OUTL(ch->r_mem, EDMA_IEM, 0); + return (0); +} + +static int +mvs_ch_resume(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + uint32_t reg; + + /* Disable port interrupts */ + ATA_OUTL(ch->r_mem, EDMA_IEM, 0); + /* Stop EDMA */ + ch->curr_mode = MVS_EDMA_UNKNOWN; + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + /* Clear and configure FIS interrupts. */ + ATA_OUTL(ch->r_mem, SATA_FISIC, 0); + reg = ATA_INL(ch->r_mem, SATA_FISC); + reg |= SATA_FISC_FISWAIT4HOSTRDYEN_B1; + ATA_OUTL(ch->r_mem, SATA_FISC, reg); + reg = ATA_INL(ch->r_mem, SATA_FISIM); + reg |= SATA_FISC_FISWAIT4HOSTRDYEN_B1; + ATA_OUTL(ch->r_mem, SATA_FISC, reg); + /* Clear SATA error register. */ + ATA_OUTL(ch->r_mem, SATA_SE, 0xffffffff); + /* Clear any outstanding error interrupts. */ + ATA_OUTL(ch->r_mem, EDMA_IEC, 0); + /* Unmask all error interrupts */ + ATA_OUTL(ch->r_mem, EDMA_IEM, ~EDMA_IE_TRANSIENT); + return (0); +} + +struct mvs_dc_cb_args { + bus_addr_t maddr; + int error; +}; + +static void +mvs_dmainit(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct mvs_dc_cb_args dcba; + + /* EDMA command request area. */ + if (bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, + NULL, NULL, MVS_WORKRQ_SIZE, 1, MVS_WORKRQ_SIZE, + 0, NULL, NULL, &ch->dma.workrq_tag)) + goto error; + if (bus_dmamem_alloc(ch->dma.workrq_tag, (void **)&ch->dma.workrq, 0, + &ch->dma.workrq_map)) + goto error; + if (bus_dmamap_load(ch->dma.workrq_tag, ch->dma.workrq_map, ch->dma.workrq, + MVS_WORKRQ_SIZE, mvs_dmasetupc_cb, &dcba, 0) || dcba.error) { + bus_dmamem_free(ch->dma.workrq_tag, ch->dma.workrq, ch->dma.workrq_map); + goto error; + } + ch->dma.workrq_bus = dcba.maddr; + /* EDMA command response area. */ + if (bus_dma_tag_create(bus_get_dma_tag(dev), 256, 0, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, + NULL, NULL, MVS_WORKRP_SIZE, 1, MVS_WORKRP_SIZE, + 0, NULL, NULL, &ch->dma.workrp_tag)) + goto error; + if (bus_dmamem_alloc(ch->dma.workrp_tag, (void **)&ch->dma.workrp, 0, + &ch->dma.workrp_map)) + goto error; + if (bus_dmamap_load(ch->dma.workrp_tag, ch->dma.workrp_map, ch->dma.workrp, + MVS_WORKRP_SIZE, mvs_dmasetupc_cb, &dcba, 0) || dcba.error) { + bus_dmamem_free(ch->dma.workrp_tag, ch->dma.workrp, ch->dma.workrp_map); + goto error; + } + ch->dma.workrp_bus = dcba.maddr; + /* Data area. */ + if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, MVS_EPRD_MAX, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, + NULL, NULL, + MVS_SG_ENTRIES * PAGE_SIZE * MVS_MAX_SLOTS, + MVS_SG_ENTRIES, MVS_EPRD_MAX, + 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) { + goto error; + } + return; + +error: + device_printf(dev, "WARNING - DMA initialization failed\n"); + mvs_dmafini(dev); +} + +static void +mvs_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct mvs_dc_cb_args *dcba = (struct mvs_dc_cb_args *)xsc; + + if (!(dcba->error = error)) + dcba->maddr = segs[0].ds_addr; +} + +static void +mvs_dmafini(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + + if (ch->dma.data_tag) { + bus_dma_tag_destroy(ch->dma.data_tag); + ch->dma.data_tag = NULL; + } + if (ch->dma.workrp_bus) { + bus_dmamap_unload(ch->dma.workrp_tag, ch->dma.workrp_map); + bus_dmamem_free(ch->dma.workrp_tag, ch->dma.workrp, ch->dma.workrp_map); + ch->dma.workrp_bus = 0; + ch->dma.workrp_map = NULL; + ch->dma.workrp = NULL; + } + if (ch->dma.workrp_tag) { + bus_dma_tag_destroy(ch->dma.workrp_tag); + ch->dma.workrp_tag = NULL; + } + if (ch->dma.workrq_bus) { + bus_dmamap_unload(ch->dma.workrq_tag, ch->dma.workrq_map); + bus_dmamem_free(ch->dma.workrq_tag, ch->dma.workrq, ch->dma.workrq_map); + ch->dma.workrq_bus = 0; + ch->dma.workrq_map = NULL; + ch->dma.workrq = NULL; + } + if (ch->dma.workrq_tag) { + bus_dma_tag_destroy(ch->dma.workrq_tag); + ch->dma.workrq_tag = NULL; + } +} + +static void +mvs_slotsalloc(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int i; + + /* Alloc and setup command/dma slots */ + bzero(ch->slot, sizeof(ch->slot)); + for (i = 0; i < MVS_MAX_SLOTS; i++) { + struct mvs_slot *slot = &ch->slot[i]; + + slot->dev = dev; + slot->slot = i; + slot->state = MVS_SLOT_EMPTY; + slot->ccb = NULL; + callout_init_mtx(&slot->timeout, &ch->mtx, 0); + + if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map)) + device_printf(ch->dev, "FAILURE - create data_map\n"); + } +} + +static void +mvs_slotsfree(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int i; + + /* Free all dma slots */ + for (i = 0; i < MVS_MAX_SLOTS; i++) { + struct mvs_slot *slot = &ch->slot[i]; + + callout_drain(&slot->timeout); + if (slot->dma.data_map) { + bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map); + slot->dma.data_map = NULL; + } + } +} + +static void +mvs_setup_edma_queues(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + uint64_t work; + + /* Requests queue. */ + work = ch->dma.workrq_bus; + ATA_OUTL(ch->r_mem, EDMA_REQQBAH, work >> 32); + ATA_OUTL(ch->r_mem, EDMA_REQQIP, work & 0xffffffff); + ATA_OUTL(ch->r_mem, EDMA_REQQOP, work & 0xffffffff); + bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map, BUS_DMASYNC_PREWRITE); + /* Reponses queue. */ + bzero(ch->dma.workrp, 256); + work = ch->dma.workrp_bus; + ATA_OUTL(ch->r_mem, EDMA_RESQBAH, work >> 32); + ATA_OUTL(ch->r_mem, EDMA_RESQIP, work & 0xffffffff); + ATA_OUTL(ch->r_mem, EDMA_RESQOP, work & 0xffffffff); + bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map, BUS_DMASYNC_PREREAD); + ch->out_idx = 0; + ch->in_idx = 0; +} + +static void +mvs_set_edma_mode(device_t dev, enum mvs_edma_mode mode) +{ + struct mvs_channel *ch = device_get_softc(dev); + int timeout; + uint32_t ecfg, fcfg, hc, ltm, unkn; + + if (mode == ch->curr_mode) + return; + /* If we are running, we should stop first. */ + if (ch->curr_mode != MVS_EDMA_OFF) { + ATA_OUTL(ch->r_mem, EDMA_CMD, EDMA_CMD_EDSEDMA); + timeout = 0; + while (ATA_INL(ch->r_mem, EDMA_CMD) & EDMA_CMD_EENEDMA) { + DELAY(1000); + if (timeout++ > 1000) { + device_printf(dev, "stopping EDMA engine failed\n"); + break; + } + }; + } + ch->curr_mode = mode; + ch->fbs_enabled = 0; + ch->fake_busy = 0; + /* Report mode to controller. Needed for correct CCC operation. */ + MVS_EDMA(device_get_parent(dev), dev, mode); + /* Configure new mode. */ + ecfg = EDMA_CFG_RESERVED | EDMA_CFG_RESERVED2 | EDMA_CFG_EHOSTQUEUECACHEEN; + if (ch->pm_present) { + ecfg |= EDMA_CFG_EMASKRXPM; + if (ch->quirks & MVS_Q_GENIIE) { + ecfg |= EDMA_CFG_EEDMAFBS; + ch->fbs_enabled = 1; + } + } + if (ch->quirks & MVS_Q_GENI) + ecfg |= EDMA_CFG_ERDBSZ; + else if (ch->quirks & MVS_Q_GENII) + ecfg |= EDMA_CFG_ERDBSZEXT | EDMA_CFG_EWRBUFFERLEN; + if (ch->quirks & MVS_Q_CT) + ecfg |= EDMA_CFG_ECUTTHROUGHEN; + if (mode != MVS_EDMA_OFF) + ecfg |= EDMA_CFG_EEARLYCOMPLETIONEN; + if (mode == MVS_EDMA_QUEUED) + ecfg |= EDMA_CFG_EQUE; + else if (mode == MVS_EDMA_NCQ) + ecfg |= EDMA_CFG_ESATANATVCMDQUE; + ATA_OUTL(ch->r_mem, EDMA_CFG, ecfg); + mvs_setup_edma_queues(dev); + if (ch->quirks & MVS_Q_GENIIE) { + /* Configure FBS-related registers */ + fcfg = ATA_INL(ch->r_mem, SATA_FISC); + ltm = ATA_INL(ch->r_mem, SATA_LTM); + hc = ATA_INL(ch->r_mem, EDMA_HC); + if (ch->fbs_enabled) { + fcfg |= SATA_FISC_FISDMAACTIVATESYNCRESP; + if (mode == MVS_EDMA_NCQ) { + fcfg &= ~SATA_FISC_FISWAIT4HOSTRDYEN_B0; + hc &= ~EDMA_IE_EDEVERR; + } else { + fcfg |= SATA_FISC_FISWAIT4HOSTRDYEN_B0; + hc |= EDMA_IE_EDEVERR; + } + ltm |= (1 << 8); + } else { + fcfg &= ~SATA_FISC_FISDMAACTIVATESYNCRESP; + fcfg &= ~SATA_FISC_FISWAIT4HOSTRDYEN_B0; + hc |= EDMA_IE_EDEVERR; + ltm &= ~(1 << 8); + } + ATA_OUTL(ch->r_mem, SATA_FISC, fcfg); + ATA_OUTL(ch->r_mem, SATA_LTM, ltm); + ATA_OUTL(ch->r_mem, EDMA_HC, hc); + /* This is some magic, required to handle several DRQs + * with basic DMA. */ + unkn = ATA_INL(ch->r_mem, EDMA_UNKN_RESD); + if (mode == MVS_EDMA_OFF) + unkn |= 1; + else + unkn &= ~1; + ATA_OUTL(ch->r_mem, EDMA_UNKN_RESD, unkn); + } + /* Run EDMA. */ + if (mode != MVS_EDMA_OFF) + ATA_OUTL(ch->r_mem, EDMA_CMD, EDMA_CMD_EENEDMA); +} + +devclass_t mvs_devclass; +devclass_t mvsch_devclass; +static device_method_t mvsch_methods[] = { + DEVMETHOD(device_probe, mvs_ch_probe), + DEVMETHOD(device_attach, mvs_ch_attach), + DEVMETHOD(device_detach, mvs_ch_detach), + DEVMETHOD(device_suspend, mvs_ch_suspend), + DEVMETHOD(device_resume, mvs_ch_resume), + { 0, 0 } +}; +static driver_t mvsch_driver = { + "mvsch", + mvsch_methods, + sizeof(struct mvs_channel) +}; +DRIVER_MODULE(mvsch, mvs, mvsch_driver, mvsch_devclass, 0, 0); +DRIVER_MODULE(mvsch, sata, mvsch_driver, mvsch_devclass, 0, 0); + +static void +mvs_phy_check_events(device_t dev, u_int32_t serr) +{ + struct mvs_channel *ch = device_get_softc(dev); + + if (ch->pm_level == 0) { + u_int32_t status = ATA_INL(ch->r_mem, SATA_SS); + union ccb *ccb; + + if (bootverbose) { + if (((status & SATA_SS_DET_MASK) == SATA_SS_DET_PHY_ONLINE) && + ((status & SATA_SS_SPD_MASK) != SATA_SS_SPD_NO_SPEED) && + ((status & SATA_SS_IPM_MASK) == SATA_SS_IPM_ACTIVE)) { + device_printf(dev, "CONNECT requested\n"); + } else + device_printf(dev, "DISCONNECT requested\n"); + } + mvs_reset(dev); + if ((ccb = xpt_alloc_ccb_nowait()) == NULL) + return; + if (xpt_create_path(&ccb->ccb_h.path, NULL, + cam_sim_path(ch->sim), + CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { + xpt_free_ccb(ccb); + return; + } + xpt_rescan(ccb); + } +} + +static void +mvs_notify_events(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct cam_path *dpath; + uint32_t fis; + int d; + + /* Try to read PMP field from SDB FIS. Present only for Gen-IIe. */ + fis = ATA_INL(ch->r_mem, SATA_FISDW0); + if ((fis & 0x80ff) == 0x80a1) + d = (fis & 0x0f00) >> 8; + else + d = ch->pm_present ? 15 : 0; + if (bootverbose) + device_printf(dev, "SNTF %d\n", d); + if (xpt_create_path(&dpath, NULL, + xpt_path_path_id(ch->path), d, 0) == CAM_REQ_CMP) { + xpt_async(AC_SCSI_AEN, dpath, NULL); + xpt_free_path(dpath); + } +} + +static void +mvs_ch_intr_locked(void *data) +{ + struct mvs_intr_arg *arg = (struct mvs_intr_arg *)data; + device_t dev = (device_t)arg->arg; + struct mvs_channel *ch = device_get_softc(dev); + + mtx_lock(&ch->mtx); + mvs_ch_intr(data); + mtx_unlock(&ch->mtx); +} + +static void +mvs_ch_pm(void *arg) +{ + device_t dev = (device_t)arg; + struct mvs_channel *ch = device_get_softc(dev); + uint32_t work; + + if (ch->numrslots != 0) + return; + /* If we are idle - request power state transition. */ + work = ATA_INL(ch->r_mem, SATA_SC); + work &= ~SATA_SC_SPM_MASK; + if (ch->pm_level == 4) + work |= SATA_SC_SPM_PARTIAL; + else + work |= SATA_SC_SPM_SLUMBER; + ATA_OUTL(ch->r_mem, SATA_SC, work); +} + +static void +mvs_ch_pm_wake(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + uint32_t work; + int timeout = 0; + + work = ATA_INL(ch->r_mem, SATA_SS); + if (work & SATA_SS_IPM_ACTIVE) + return; + /* If we are not in active state - request power state transition. */ + work = ATA_INL(ch->r_mem, SATA_SC); + work &= ~SATA_SC_SPM_MASK; + work |= SATA_SC_SPM_ACTIVE; + ATA_OUTL(ch->r_mem, SATA_SC, work); + /* Wait for transition to happen. */ + while ((ATA_INL(ch->r_mem, SATA_SS) & SATA_SS_IPM_ACTIVE) == 0 && + timeout++ < 100) { + DELAY(100); + } +} + +static void +mvs_ch_intr(void *data) +{ + struct mvs_intr_arg *arg = (struct mvs_intr_arg *)data; + device_t dev = (device_t)arg->arg; + struct mvs_channel *ch = device_get_softc(dev); + uint32_t iec, serr = 0, fisic = 0; + enum mvs_err_type et; + int i, ccs, port = -1, selfdis = 0; + int edma = (ch->numtslots != 0 || ch->numdslots != 0); + +//device_printf(dev, "irq cause %02x EDMA %d IEC %08x\n", +// arg->cause, edma, ATA_INL(ch->r_mem, EDMA_IEC)); + /* New item in response queue. */ + if ((arg->cause & 2) && edma) + mvs_crbq_intr(dev); + /* Some error or special event. */ + if (arg->cause & 1) { + iec = ATA_INL(ch->r_mem, EDMA_IEC); +//device_printf(dev, "irq cause %02x EDMA %d IEC %08x\n", +// arg->cause, edma, iec); + if (iec & EDMA_IE_SERRINT) { + serr = ATA_INL(ch->r_mem, SATA_SE); + ATA_OUTL(ch->r_mem, SATA_SE, serr); +//device_printf(dev, "SERR %08x\n", serr); + } + /* EDMA self-disabled due to error. */ + if (iec & EDMA_IE_ESELFDIS) + selfdis = 1; + /* Transport interrupt. */ + if (iec & EDMA_IE_ETRANSINT) { + /* For Gen-I this bit means self-disable. */ + if (ch->quirks & MVS_Q_GENI) + selfdis = 1; + /* For Gen-II this bit means SDB-N. */ + else if (ch->quirks & MVS_Q_GENII) + fisic = SATA_FISC_FISWAIT4HOSTRDYEN_B1; + else /* For Gen-IIe - read FIS interrupt cause. */ + fisic = ATA_INL(ch->r_mem, SATA_FISIC); +//device_printf(dev, "FISIC %08x\n", fisic); + } + if (selfdis) + ch->curr_mode = MVS_EDMA_UNKNOWN; + ATA_OUTL(ch->r_mem, EDMA_IEC, ~iec); + /* Interface errors or Device error. */ + if (iec & (0xfc1e9000 | EDMA_IE_EDEVERR)) { + port = -1; + if (ch->numpslots != 0) { + ccs = 0; + } else { + if (ch->quirks & MVS_Q_GENIIE) + ccs = EDMA_S_EIOID(ATA_INL(ch->r_mem, EDMA_S)); + else + ccs = EDMA_S_EDEVQUETAG(ATA_INL(ch->r_mem, EDMA_S)); + /* Check if error is one-PMP-port-specific, */ + if (ch->fbs_enabled) { + /* Which ports were active. */ + for (i = 0; i < 16; i++) { + if (ch->numrslotspd[i] == 0) + continue; + if (port == -1) + port = i; + else if (port != i) { + port = -2; + break; + } + } + /* If several ports were active and EDMA still enabled - + * other ports are probably unaffected and may continue. + */ + if (port == -2 && !selfdis) { + uint16_t p = ATA_INL(ch->r_mem, SATA_SATAITC) >> 16; + port = ffs(p) - 1; + if (port != (fls(p) - 1)) + port = -2; + } + } + } +//device_printf(dev, "err slot %d port %d\n", ccs, port); + mvs_requeue_frozen(dev); + for (i = 0; i < MVS_MAX_SLOTS; i++) { + /* XXX: reqests in loading state. */ + if (((ch->rslots >> i) & 1) == 0) + continue; + if (port >= 0 && + ch->slot[i].ccb->ccb_h.target_id != port) + continue; + if (iec & EDMA_IE_EDEVERR) { /* Device error. */ + if (port != -2) { + if (ch->numtslots == 0) { + /* Untagged operation. */ + if (i == ccs) + et = MVS_ERR_TFE; + else + et = MVS_ERR_INNOCENT; + } else { + /* Tagged operation. */ + et = MVS_ERR_NCQ; + } + } else { + et = MVS_ERR_TFE; + ch->fatalerr = 1; + } + } else if (iec & 0xfc1e9000) { + if (ch->numtslots == 0 && i != ccs && port != -2) + et = MVS_ERR_INNOCENT; + else + et = MVS_ERR_SATA; + } else + et = MVS_ERR_INVALID; + mvs_end_transaction(&ch->slot[i], et); + } + } + /* Process SDB-N. */ + if (fisic & SATA_FISC_FISWAIT4HOSTRDYEN_B1) + mvs_notify_events(dev); + if (fisic) + ATA_OUTL(ch->r_mem, SATA_FISIC, ~fisic); + /* Process hot-plug. */ + if ((iec & (EDMA_IE_EDEVDIS | EDMA_IE_EDEVCON)) || + (serr & SATA_SE_PHY_CHANGED)) + mvs_phy_check_events(dev, serr); + } + /* Legacy mode device interrupt. */ + if ((arg->cause & 2) && !edma) + mvs_legacy_intr(dev); +} + +static uint8_t +mvs_getstatus(device_t dev, int clear) +{ + struct mvs_channel *ch = device_get_softc(dev); + uint8_t status = ATA_INB(ch->r_mem, clear ? ATA_STATUS : ATA_ALTSTAT); + + if (ch->fake_busy) { + if (status & (ATA_S_BUSY | ATA_S_DRQ | ATA_S_ERROR)) + ch->fake_busy = 0; + else + status |= ATA_S_BUSY; + } + return (status); +} + +static void +mvs_legacy_intr(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct mvs_slot *slot = &ch->slot[0]; /* PIO is always in slot 0. */ + union ccb *ccb = slot->ccb; + enum mvs_err_type et = MVS_ERR_NONE; + int port; + u_int length; + uint8_t status, ireason; + + /* Clear interrupt and get status. */ + status = mvs_getstatus(dev, 1); +// device_printf(dev, "Legacy intr status %02x\n", +// status); + if (slot->state < MVS_SLOT_RUNNING) + return; + port = ccb->ccb_h.target_id & 0x0f; + /* Wait a bit for late !BUSY status update. */ + if (status & ATA_S_BUSY) { + DELAY(100); + if ((status = mvs_getstatus(dev, 1)) & ATA_S_BUSY) { + DELAY(1000); + if ((status = mvs_getstatus(dev, 1)) & ATA_S_BUSY) + return; + } + } + /* If we got an error, we are done. */ + if (status & ATA_S_ERROR) { + et = MVS_ERR_TFE; + goto end_finished; + } + if (ccb->ccb_h.func_code == XPT_ATA_IO) { /* ATA PIO */ + ccb->ataio.res.status = status; + /* Are we moving data? */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { + /* If data read command - get them. */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { + if (mvs_wait(dev, ATA_S_DRQ, ATA_S_BUSY, 1000) < 0) { + device_printf(dev, "timeout waiting for read DRQ\n"); + et = MVS_ERR_TIMEOUT; + goto end_finished; + } + ATA_INSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->ataio.data_ptr + ch->donecount), + ch->transfersize / 2); + } + /* Update how far we've gotten. */ + ch->donecount += ch->transfersize; + /* Do we need more? */ + if (ccb->ataio.dxfer_len > ch->donecount) { + /* Set this transfer size according to HW capabilities */ + ch->transfersize = min(ccb->ataio.dxfer_len - ch->donecount, + ch->curr[ccb->ccb_h.target_id].bytecount); + /* If data write command - put them */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { + if (mvs_wait(dev, ATA_S_DRQ, ATA_S_BUSY, 1000) < 0) { + device_printf(dev, "timeout waiting for write DRQ\n"); + et = MVS_ERR_TIMEOUT; + goto end_finished; + } + ATA_OUTSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->ataio.data_ptr + ch->donecount), + ch->transfersize / 2); + return; + } + /* If data read command, return & wait for interrupt */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) + return; + } + } + } else if (ch->basic_dma) { /* ATAPI DMA */ + if (status & ATA_S_DWF) + et = MVS_ERR_TFE; + else if (ATA_INL(ch->r_mem, DMA_S) & DMA_S_ERR) + et = MVS_ERR_TFE; + /* Stop basic DMA. */ + ATA_OUTL(ch->r_mem, DMA_C, 0); + goto end_finished; + } else { /* ATAPI PIO */ + length = ATA_INB(ch->r_mem,ATA_CYL_LSB) | (ATA_INB(ch->r_mem,ATA_CYL_MSB) << 8); + ireason = ATA_INB(ch->r_mem,ATA_IREASON); +//device_printf(dev, "status %02x, ireason %02x, length %d\n", status, ireason, length); + switch ((ireason & (ATA_I_CMD | ATA_I_IN)) | + (status & ATA_S_DRQ)) { + + case ATAPI_P_CMDOUT: +device_printf(dev, "ATAPI CMDOUT\n"); + /* Return wait for interrupt */ + return; + + case ATAPI_P_WRITE: +//device_printf(dev, "ATAPI WRITE\n"); + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { + device_printf(dev, "trying to write on read buffer\n"); + et = MVS_ERR_TFE; + goto end_finished; + break; + } + ATA_OUTSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->csio.data_ptr + ch->donecount), + length / 2); + ch->donecount += length; + /* Set next transfer size according to HW capabilities */ + ch->transfersize = min(ccb->csio.dxfer_len - ch->donecount, + ch->curr[ccb->ccb_h.target_id].bytecount); + /* Return wait for interrupt */ + return; + + case ATAPI_P_READ: +//device_printf(dev, "ATAPI READ\n"); + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { + device_printf(dev, "trying to read on write buffer\n"); + et = MVS_ERR_TFE; + goto end_finished; + } + ATA_INSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->csio.data_ptr + ch->donecount), + length / 2); + ch->donecount += length; + /* Set next transfer size according to HW capabilities */ + ch->transfersize = min(ccb->csio.dxfer_len - ch->donecount, + ch->curr[ccb->ccb_h.target_id].bytecount); + /* Return wait for interrupt */ + return; + + case ATAPI_P_DONEDRQ: +device_printf(dev, "ATAPI DONEDRQ\n"); + device_printf(dev, + "WARNING - DONEDRQ non conformant device\n"); + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { + ATA_INSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->csio.data_ptr + ch->donecount), + length / 2); + ch->donecount += length; + } + else if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { + ATA_OUTSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->csio.data_ptr + ch->donecount), + length / 2); + ch->donecount += length; + } + else + et = MVS_ERR_TFE; + /* FALLTHROUGH */ + + case ATAPI_P_ABORT: + case ATAPI_P_DONE: +//device_printf(dev, "ATAPI ABORT/DONE\n"); + if (status & (ATA_S_ERROR | ATA_S_DWF)) + et = MVS_ERR_TFE; + goto end_finished; + + default: + device_printf(dev, "unknown transfer phase (status %02x, ireason %02x)\n", + status, ireason); + et = MVS_ERR_TFE; + } + } + +end_finished: + mvs_end_transaction(slot, et); +} + +static void +mvs_crbq_intr(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct mvs_crpb *crpb; + union ccb *ccb; + int in_idx, cin_idx, slot; + uint16_t flags; + + in_idx = (ATA_INL(ch->r_mem, EDMA_RESQIP) & EDMA_RESQP_ERPQP_MASK) >> + EDMA_RESQP_ERPQP_SHIFT; + bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map, + BUS_DMASYNC_POSTREAD); + cin_idx = ch->in_idx; + ch->in_idx = in_idx; + while (in_idx != cin_idx) { + crpb = (struct mvs_crpb *) + (ch->dma.workrp + MVS_CRPB_OFFSET + (MVS_CRPB_SIZE * cin_idx)); + slot = le16toh(crpb->id) & MVS_CRPB_TAG_MASK; + flags = le16toh(crpb->rspflg); +//device_printf(dev, "CRPB %d %d %04x\n", cin_idx, slot, flags); + /* + * Handle only successfull completions here. + * Errors will be handled by main intr handler. + */ + if (ch->numtslots != 0 || (flags & EDMA_IE_EDEVERR) == 0) { +if ((flags >> 8) & ATA_S_ERROR) +device_printf(dev, "ERROR STATUS CRPB %d %d %04x\n", cin_idx, slot, flags); + if (ch->slot[slot].state >= MVS_SLOT_RUNNING) { + ccb = ch->slot[slot].ccb; + ccb->ataio.res.status = (flags & MVS_CRPB_ATASTS_MASK) >> + MVS_CRPB_ATASTS_SHIFT; + mvs_end_transaction(&ch->slot[slot], MVS_ERR_NONE); + } else +device_printf(dev, "EMPTY CRPB %d (->%d) %d %04x\n", cin_idx, in_idx, slot, flags); + } else +device_printf(dev, "ERROR FLAGS CRPB %d %d %04x\n", cin_idx, slot, flags); + + cin_idx = (cin_idx + 1) & (MVS_MAX_SLOTS - 1); + } + bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map, + BUS_DMASYNC_PREREAD); + if (cin_idx == ch->in_idx) { + ATA_OUTL(ch->r_mem, EDMA_RESQOP, + ch->dma.workrp_bus | (cin_idx << EDMA_RESQP_ERPQP_SHIFT)); + } +} + +/* Must be called with channel locked. */ +static int +mvs_check_collision(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + + if (ccb->ccb_h.func_code == XPT_ATA_IO) { + /* NCQ DMA */ + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + /* Can't mix NCQ and non-NCQ DMA commands. */ + if (ch->numdslots != 0) + return (1); + /* Can't mix NCQ and PIO commands. */ + if (ch->numpslots != 0) + return (1); + /* If we have no FBS */ + if (!ch->fbs_enabled) { + /* Tagged command while tagged to other target is active. */ + if (ch->numtslots != 0 && + ch->taggedtarget != ccb->ccb_h.target_id) + return (1); + } + /* Non-NCQ DMA */ + } else if (ccb->ataio.cmd.flags & CAM_ATAIO_DMA) { + /* Can't mix non-NCQ DMA and NCQ commands. */ + if (ch->numtslots != 0) + return (1); + /* Can't mix non-NCQ DMA and PIO commands. */ + if (ch->numpslots != 0) + return (1); + /* PIO */ + } else { + /* Can't mix PIO with anything. */ + if (ch->numrslots != 0) + return (1); + } + if (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT)) { + /* Atomic command while anything active. */ + if (ch->numrslots != 0) + return (1); + } + } else { /* ATAPI */ + /* ATAPI goes without EDMA, so can't mix it with anything. */ + if (ch->numrslots != 0) + return (1); + } + /* We have some atomic command running. */ + if (ch->aslots != 0) + return (1); + return (0); +} + +static void +mvs_tfd_read(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct ata_res *res = &ccb->ataio.res; + + res->status = ATA_INB(ch->r_mem, ATA_ALTSTAT); + res->error = ATA_INB(ch->r_mem, ATA_ERROR); + res->device = ATA_INB(ch->r_mem, ATA_DRIVE); + ATA_OUTB(ch->r_mem, ATA_CONTROL, ATA_A_HOB); + res->sector_count_exp = ATA_INB(ch->r_mem, ATA_COUNT); + res->lba_low_exp = ATA_INB(ch->r_mem, ATA_SECTOR); + res->lba_mid_exp = ATA_INB(ch->r_mem, ATA_CYL_LSB); + res->lba_high_exp = ATA_INB(ch->r_mem, ATA_CYL_MSB); + ATA_OUTB(ch->r_mem, ATA_CONTROL, 0); + res->sector_count = ATA_INB(ch->r_mem, ATA_COUNT); + res->lba_low = ATA_INB(ch->r_mem, ATA_SECTOR); + res->lba_mid = ATA_INB(ch->r_mem, ATA_CYL_LSB); + res->lba_high = ATA_INB(ch->r_mem, ATA_CYL_MSB); +} + +static void +mvs_tfd_write(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct ata_cmd *cmd = &ccb->ataio.cmd; + + ATA_OUTB(ch->r_mem, ATA_DRIVE, cmd->device); + ATA_OUTB(ch->r_mem, ATA_CONTROL, cmd->control); + ATA_OUTB(ch->r_mem, ATA_FEATURE, cmd->features_exp); + ATA_OUTB(ch->r_mem, ATA_FEATURE, cmd->features); + ATA_OUTB(ch->r_mem, ATA_COUNT, cmd->sector_count_exp); + ATA_OUTB(ch->r_mem, ATA_COUNT, cmd->sector_count); + ATA_OUTB(ch->r_mem, ATA_SECTOR, cmd->lba_low_exp); + ATA_OUTB(ch->r_mem, ATA_SECTOR, cmd->lba_low); + ATA_OUTB(ch->r_mem, ATA_CYL_LSB, cmd->lba_mid_exp); + ATA_OUTB(ch->r_mem, ATA_CYL_LSB, cmd->lba_mid); + ATA_OUTB(ch->r_mem, ATA_CYL_MSB, cmd->lba_high_exp); + ATA_OUTB(ch->r_mem, ATA_CYL_MSB, cmd->lba_high); + ATA_OUTB(ch->r_mem, ATA_COMMAND, cmd->command); +} + + +/* Must be called with channel locked. */ +static void +mvs_begin_transaction(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + struct mvs_slot *slot; + int slotn, tag; + + if (ch->pm_level > 0) + mvs_ch_pm_wake(dev); + /* Softreset is a special case. */ + if (ccb->ccb_h.func_code == XPT_ATA_IO && + (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) { + mvs_softreset(dev, ccb); + return; + } + /* Choose empty slot. */ + slotn = ffs(~ch->oslots) - 1; + if ((ccb->ccb_h.func_code == XPT_ATA_IO) && + (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { + if (ch->quirks & MVS_Q_GENIIE) + tag = ffs(~ch->otagspd[ccb->ccb_h.target_id]) - 1; + else + tag = slotn; + } else + tag = 0; + /* Occupy chosen slot. */ + slot = &ch->slot[slotn]; + slot->ccb = ccb; + slot->tag = tag; + /* Stop PM timer. */ + if (ch->numrslots == 0 && ch->pm_level > 3) + callout_stop(&ch->pm_timer); + /* Update channel stats. */ + ch->oslots |= (1 << slot->slot); + ch->numrslots++; + ch->numrslotspd[ccb->ccb_h.target_id]++; + if (ccb->ccb_h.func_code == XPT_ATA_IO) { + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + ch->otagspd[ccb->ccb_h.target_id] |= (1 << slot->tag); + ch->numtslots++; + ch->numtslotspd[ccb->ccb_h.target_id]++; + ch->taggedtarget = ccb->ccb_h.target_id; + mvs_set_edma_mode(dev, MVS_EDMA_NCQ); + } else if (ccb->ataio.cmd.flags & CAM_ATAIO_DMA) { + ch->numdslots++; + mvs_set_edma_mode(dev, MVS_EDMA_ON); + } else { + ch->numpslots++; + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + } + if (ccb->ataio.cmd.flags & + (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT)) { + ch->aslots |= (1 << slot->slot); + } + } else { + uint8_t *cdb = (ccb->ccb_h.flags & CAM_CDB_POINTER) ? + ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes; + ch->numpslots++; + /* Use ATAPI DMA only for commands without under-/overruns. */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && + ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA && + (ch->quirks & MVS_Q_SOC) == 0 && + (cdb[0] == 0x08 || + cdb[0] == 0x0a || + cdb[0] == 0x28 || + cdb[0] == 0x2a || + cdb[0] == 0x88 || + cdb[0] == 0x8a || + cdb[0] == 0xa8 || + cdb[0] == 0xaa || + cdb[0] == 0xbe)) { + ch->basic_dma = 1; + } + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + } + if (ch->numpslots == 0 || ch->basic_dma) { + void *buf; + bus_size_t size; + + slot->state = MVS_SLOT_LOADING; + if (ccb->ccb_h.func_code == XPT_ATA_IO) { + buf = ccb->ataio.data_ptr; + size = ccb->ataio.dxfer_len; + } else { + buf = ccb->csio.data_ptr; + size = ccb->csio.dxfer_len; + } + bus_dmamap_load(ch->dma.data_tag, slot->dma.data_map, + buf, size, mvs_dmasetprd, slot, 0); + } else + mvs_legacy_execute_transaction(slot); +} + +/* Locked by busdma engine. */ +static void +mvs_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct mvs_slot *slot = arg; + struct mvs_channel *ch = device_get_softc(slot->dev); + struct mvs_eprd *eprd; + int i; + + if (error) { + device_printf(slot->dev, "DMA load error\n"); + mvs_end_transaction(slot, MVS_ERR_INVALID); + return; + } + KASSERT(nsegs <= MVS_SG_ENTRIES, ("too many DMA segment entries\n")); + /* If there is only one segment - no need to use S/G table on Gen-IIe. */ + if (nsegs == 1 && ch->basic_dma == 0 && (ch->quirks & MVS_Q_GENIIE)) { + slot->dma.addr = segs[0].ds_addr; + slot->dma.len = segs[0].ds_len; + } else { + slot->dma.addr = 0; + /* Get a piece of the workspace for this EPRD */ + eprd = (struct mvs_eprd *) + (ch->dma.workrq + MVS_EPRD_OFFSET + (MVS_EPRD_SIZE * slot->slot)); + /* Fill S/G table */ + for (i = 0; i < nsegs; i++) { + eprd[i].prdbal = htole32(segs[i].ds_addr); + eprd[i].bytecount = htole32(segs[i].ds_len & MVS_EPRD_MASK); + eprd[i].prdbah = htole32((segs[i].ds_addr >> 16) >> 16); + } + eprd[i - 1].bytecount |= htole32(MVS_EPRD_EOF); + } + bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, + ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ? + BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE)); + if (ch->basic_dma) + mvs_legacy_execute_transaction(slot); + else + mvs_execute_transaction(slot); +} + +static void +mvs_legacy_execute_transaction(struct mvs_slot *slot) +{ + device_t dev = slot->dev; + struct mvs_channel *ch = device_get_softc(dev); + bus_addr_t eprd; + union ccb *ccb = slot->ccb; + int port = ccb->ccb_h.target_id & 0x0f; + int timeout; + + slot->state = MVS_SLOT_RUNNING; + ch->rslots |= (1 << slot->slot); + ATA_OUTB(ch->r_mem, SATA_SATAICTL, port << SATA_SATAICTL_PMPTX_SHIFT); + if (ccb->ccb_h.func_code == XPT_ATA_IO) { +// device_printf(dev, "%d Legacy command %02x size %d\n", +// port, ccb->ataio.cmd.command, ccb->ataio.dxfer_len); + mvs_tfd_write(dev, ccb); + /* Device reset doesn't interrupt. */ + if (ccb->ataio.cmd.command == ATA_DEVICE_RESET) { + int timeout = 1000000; + do { + DELAY(10); + ccb->ataio.res.status = ATA_INB(ch->r_mem, ATA_STATUS); + } while (ccb->ataio.res.status & ATA_S_BUSY && timeout--); + mvs_legacy_intr(dev); + return; + } + ch->donecount = 0; + ch->transfersize = min(ccb->ataio.dxfer_len, + ch->curr[port].bytecount); + if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) + ch->fake_busy = 1; + /* If data write command - output the data */ + if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { + if (mvs_wait(dev, ATA_S_DRQ, ATA_S_BUSY, 1000) < 0) { + device_printf(dev, "timeout waiting for write DRQ\n"); + mvs_end_transaction(slot, MVS_ERR_TIMEOUT); + return; + } + ATA_OUTSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)(ccb->ataio.data_ptr + ch->donecount), + ch->transfersize / 2); + } + } else { +// device_printf(dev, "%d ATAPI command %02x size %d dma %d\n", +// port, ccb->csio.cdb_io.cdb_bytes[0], ccb->csio.dxfer_len, +// ch->basic_dma); + ch->donecount = 0; + ch->transfersize = min(ccb->csio.dxfer_len, + ch->curr[port].bytecount); + /* Write ATA PACKET command. */ + if (ch->basic_dma) { + ATA_OUTB(ch->r_mem, ATA_FEATURE, ATA_F_DMA); + ATA_OUTB(ch->r_mem, ATA_CYL_LSB, 0); + ATA_OUTB(ch->r_mem, ATA_CYL_MSB, 0); + } else { + ATA_OUTB(ch->r_mem, ATA_FEATURE, 0); + ATA_OUTB(ch->r_mem, ATA_CYL_LSB, ch->transfersize); + ATA_OUTB(ch->r_mem, ATA_CYL_MSB, ch->transfersize >> 8); + } + ATA_OUTB(ch->r_mem, ATA_COMMAND, ATA_PACKET_CMD); + ch->fake_busy = 1; + /* Wait for ready to write ATAPI command block */ + if (mvs_wait(dev, 0, ATA_S_BUSY, 1000) < 0) { + device_printf(dev, "timeout waiting for ATAPI !BUSY\n"); + mvs_end_transaction(slot, MVS_ERR_TIMEOUT); + return; + } + timeout = 5000; + while (timeout--) { + int reason = ATA_INB(ch->r_mem, ATA_IREASON); + int status = ATA_INB(ch->r_mem, ATA_STATUS); + + if (((reason & (ATA_I_CMD | ATA_I_IN)) | + (status & (ATA_S_DRQ | ATA_S_BUSY))) == ATAPI_P_CMDOUT) + break; + DELAY(20); + } + if (timeout <= 0) { + device_printf(dev, "timeout waiting for ATAPI command ready\n"); + mvs_end_transaction(slot, MVS_ERR_TIMEOUT); + return; + } + /* Write ATAPI command. */ + ATA_OUTSW_STRM(ch->r_mem, ATA_DATA, + (uint16_t *)((ccb->ccb_h.flags & CAM_CDB_POINTER) ? + ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes), + ch->curr[port].atapi / 2); + DELAY(10); + if (ch->basic_dma) { + /* Start basic DMA. */ + eprd = ch->dma.workrq_bus + MVS_EPRD_OFFSET + + (MVS_EPRD_SIZE * slot->slot); + ATA_OUTL(ch->r_mem, DMA_DTLBA, eprd); + ATA_OUTL(ch->r_mem, DMA_DTHBA, (eprd >> 16) >> 16); + ATA_OUTL(ch->r_mem, DMA_C, DMA_C_START | + (((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) ? + DMA_C_READ : 0)); + } else if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) + ch->fake_busy = 1; + } + /* Start command execution timeout */ + callout_reset(&slot->timeout, (int)ccb->ccb_h.timeout * hz / 1000, + (timeout_t*)mvs_timeout, slot); +} + +/* Must be called with channel locked. */ +static void +mvs_execute_transaction(struct mvs_slot *slot) +{ + device_t dev = slot->dev; + struct mvs_channel *ch = device_get_softc(dev); + bus_addr_t eprd; + struct mvs_crqb *crqb; + struct mvs_crqb_gen2e *crqb2e; + union ccb *ccb = slot->ccb; + int port = ccb->ccb_h.target_id & 0x0f; + int i; + +// device_printf(dev, "%d EDMA command %02x size %d slot %d tag %d\n", +// port, ccb->ataio.cmd.command, ccb->ataio.dxfer_len, slot->slot, slot->tag); + /* Get address of the prepared EPRD */ + eprd = ch->dma.workrq_bus + MVS_EPRD_OFFSET + (MVS_EPRD_SIZE * slot->slot); + /* Prepare CRQB. Gen IIe uses different CRQB format. */ + if (ch->quirks & MVS_Q_GENIIE) { + crqb2e = (struct mvs_crqb_gen2e *) + (ch->dma.workrq + MVS_CRQB_OFFSET + (MVS_CRQB_SIZE * ch->out_idx)); + crqb2e->ctrlflg = htole32( + ((ccb->ccb_h.flags & CAM_DIR_IN) ? MVS_CRQB2E_READ : 0) | + (slot->tag << MVS_CRQB2E_DTAG_SHIFT) | + (port << MVS_CRQB2E_PMP_SHIFT) | + (slot->slot << MVS_CRQB2E_HTAG_SHIFT)); + /* If there is only one segment - no need to use S/G table. */ + if (slot->dma.addr != 0) { + eprd = slot->dma.addr; + crqb2e->ctrlflg |= htole32(MVS_CRQB2E_CPRD); + crqb2e->drbc = slot->dma.len; + } + crqb2e->cprdbl = htole32(eprd); + crqb2e->cprdbh = htole32((eprd >> 16) >> 16); + crqb2e->cmd[0] = 0; + crqb2e->cmd[1] = 0; + crqb2e->cmd[2] = ccb->ataio.cmd.command; + crqb2e->cmd[3] = ccb->ataio.cmd.features; + crqb2e->cmd[4] = ccb->ataio.cmd.lba_low; + crqb2e->cmd[5] = ccb->ataio.cmd.lba_mid; + crqb2e->cmd[6] = ccb->ataio.cmd.lba_high; + crqb2e->cmd[7] = ccb->ataio.cmd.device; + crqb2e->cmd[8] = ccb->ataio.cmd.lba_low_exp; + crqb2e->cmd[9] = ccb->ataio.cmd.lba_mid_exp; + crqb2e->cmd[10] = ccb->ataio.cmd.lba_high_exp; + crqb2e->cmd[11] = ccb->ataio.cmd.features_exp; + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + crqb2e->cmd[12] = slot->tag << 3; + crqb2e->cmd[13] = 0; + } else { + crqb2e->cmd[12] = ccb->ataio.cmd.sector_count; + crqb2e->cmd[13] = ccb->ataio.cmd.sector_count_exp; + } + crqb2e->cmd[14] = 0; + crqb2e->cmd[15] = 0; + } else { + crqb = (struct mvs_crqb *) + (ch->dma.workrq + MVS_CRQB_OFFSET + (MVS_CRQB_SIZE * ch->out_idx)); + crqb->cprdbl = htole32(eprd); + crqb->cprdbh = htole32((eprd >> 16) >> 16); + crqb->ctrlflg = htole16( + ((ccb->ccb_h.flags & CAM_DIR_IN) ? MVS_CRQB_READ : 0) | + (slot->slot << MVS_CRQB_TAG_SHIFT) | + (port << MVS_CRQB_PMP_SHIFT)); + i = 0; + /* + * Controller can handle only 11 of 12 ATA registers, + * so we have to choose which one to skip. + */ + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + crqb->cmd[i++] = ccb->ataio.cmd.features_exp; + crqb->cmd[i++] = 0x11; + } + crqb->cmd[i++] = ccb->ataio.cmd.features; + crqb->cmd[i++] = 0x11; + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + crqb->cmd[i++] = slot->tag << 3; + crqb->cmd[i++] = 0x12; + } else { + crqb->cmd[i++] = ccb->ataio.cmd.sector_count_exp; + crqb->cmd[i++] = 0x12; + crqb->cmd[i++] = ccb->ataio.cmd.sector_count; + crqb->cmd[i++] = 0x12; + } + crqb->cmd[i++] = ccb->ataio.cmd.lba_low_exp; + crqb->cmd[i++] = 0x13; + crqb->cmd[i++] = ccb->ataio.cmd.lba_low; + crqb->cmd[i++] = 0x13; + crqb->cmd[i++] = ccb->ataio.cmd.lba_mid_exp; + crqb->cmd[i++] = 0x14; + crqb->cmd[i++] = ccb->ataio.cmd.lba_mid; + crqb->cmd[i++] = 0x14; + crqb->cmd[i++] = ccb->ataio.cmd.lba_high_exp; + crqb->cmd[i++] = 0x15; + crqb->cmd[i++] = ccb->ataio.cmd.lba_high; + crqb->cmd[i++] = 0x15; + crqb->cmd[i++] = ccb->ataio.cmd.device; + crqb->cmd[i++] = 0x16; + crqb->cmd[i++] = ccb->ataio.cmd.command; + crqb->cmd[i++] = 0x97; + } + bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map, + BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(ch->dma.workrp_tag, ch->dma.workrp_map, + BUS_DMASYNC_PREREAD); + slot->state = MVS_SLOT_RUNNING; + ch->rslots |= (1 << slot->slot); + /* Issue command to the controller. */ + ch->out_idx = (ch->out_idx + 1) & (MVS_MAX_SLOTS - 1); + ATA_OUTL(ch->r_mem, EDMA_REQQIP, + ch->dma.workrq_bus + MVS_CRQB_OFFSET + (MVS_CRQB_SIZE * ch->out_idx)); + /* Start command execution timeout */ + callout_reset(&slot->timeout, (int)ccb->ccb_h.timeout * hz / 1000, + (timeout_t*)mvs_timeout, slot); + return; +} + +/* Must be called with channel locked. */ +static void +mvs_process_timeout(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int i; + + mtx_assert(&ch->mtx, MA_OWNED); + /* Handle the rest of commands. */ + for (i = 0; i < MVS_MAX_SLOTS; i++) { + /* Do we have a running request on slot? */ + if (ch->slot[i].state < MVS_SLOT_RUNNING) + continue; + mvs_end_transaction(&ch->slot[i], MVS_ERR_TIMEOUT); + } +} + +/* Must be called with channel locked. */ +static void +mvs_rearm_timeout(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int i; + + mtx_assert(&ch->mtx, MA_OWNED); + for (i = 0; i < MVS_MAX_SLOTS; i++) { + struct mvs_slot *slot = &ch->slot[i]; + + /* Do we have a running request on slot? */ + if (slot->state < MVS_SLOT_RUNNING) + continue; + if ((ch->toslots & (1 << i)) == 0) + continue; + callout_reset(&slot->timeout, + (int)slot->ccb->ccb_h.timeout * hz / 2000, + (timeout_t*)mvs_timeout, slot); + } +} + +/* Locked by callout mechanism. */ +static void +mvs_timeout(struct mvs_slot *slot) +{ + device_t dev = slot->dev; + struct mvs_channel *ch = device_get_softc(dev); + + /* Check for stale timeout. */ + if (slot->state < MVS_SLOT_RUNNING) + return; + device_printf(dev, "Timeout on slot %d\n", slot->slot); + device_printf(dev, "iec %08x sstat %08x serr %08x edma_s %08x " + "dma_c %08x dma_s %08x rs %08x status %02x\n", + ATA_INL(ch->r_mem, EDMA_IEC), + ATA_INL(ch->r_mem, SATA_SS), ATA_INL(ch->r_mem, SATA_SE), + ATA_INL(ch->r_mem, EDMA_S), ATA_INL(ch->r_mem, DMA_C), + ATA_INL(ch->r_mem, DMA_S), ch->rslots, + ATA_INB(ch->r_mem, ATA_ALTSTAT)); + /* Handle frozen command. */ + mvs_requeue_frozen(dev); + /* We wait for other commands timeout and pray. */ + if (ch->toslots == 0) + xpt_freeze_simq(ch->sim, 1); + ch->toslots |= (1 << slot->slot); + if ((ch->rslots & ~ch->toslots) == 0) + mvs_process_timeout(dev); + else + device_printf(dev, " ... waiting for slots %08x\n", + ch->rslots & ~ch->toslots); +} + +/* Must be called with channel locked. */ +static void +mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et) +{ + device_t dev = slot->dev; + struct mvs_channel *ch = device_get_softc(dev); + union ccb *ccb = slot->ccb; + +//device_printf(dev, "cmd done status %d\n", et); + bus_dmamap_sync(ch->dma.workrq_tag, ch->dma.workrq_map, + BUS_DMASYNC_POSTWRITE); + /* Read result registers to the result struct + * May be incorrect if several commands finished same time, + * so read only when sure or have to. + */ + if (ccb->ccb_h.func_code == XPT_ATA_IO) { + struct ata_res *res = &ccb->ataio.res; + + if ((et == MVS_ERR_TFE) || + (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) { + mvs_tfd_read(dev, ccb); + } else + bzero(res, sizeof(*res)); + } + if (ch->numpslots == 0 || ch->basic_dma) { + if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { + bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, + (ccb->ccb_h.flags & CAM_DIR_IN) ? + BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map); + } + } + if (et != MVS_ERR_NONE) + ch->eslots |= (1 << slot->slot); + /* In case of error, freeze device for proper recovery. */ + if ((et != MVS_ERR_NONE) && (!ch->readlog) && + !(ccb->ccb_h.status & CAM_DEV_QFRZN)) { + xpt_freeze_devq(ccb->ccb_h.path, 1); + ccb->ccb_h.status |= CAM_DEV_QFRZN; + } + /* Set proper result status. */ + ccb->ccb_h.status &= ~CAM_STATUS_MASK; + switch (et) { + case MVS_ERR_NONE: + ccb->ccb_h.status |= CAM_REQ_CMP; + if (ccb->ccb_h.func_code == XPT_SCSI_IO) + ccb->csio.scsi_status = SCSI_STATUS_OK; + break; + case MVS_ERR_INVALID: + ch->fatalerr = 1; + ccb->ccb_h.status |= CAM_REQ_INVALID; + break; + case MVS_ERR_INNOCENT: + ccb->ccb_h.status |= CAM_REQUEUE_REQ; + break; + case MVS_ERR_TFE: + case MVS_ERR_NCQ: + if (ccb->ccb_h.func_code == XPT_SCSI_IO) { + ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; + ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND; + } else { + ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR; + } + break; + case MVS_ERR_SATA: + ch->fatalerr = 1; + if (!ch->readlog) { + xpt_freeze_simq(ch->sim, 1); + ccb->ccb_h.status &= ~CAM_STATUS_MASK; + ccb->ccb_h.status |= CAM_RELEASE_SIMQ; + } + ccb->ccb_h.status |= CAM_UNCOR_PARITY; + break; + case MVS_ERR_TIMEOUT: + if (!ch->readlog) { + xpt_freeze_simq(ch->sim, 1); + ccb->ccb_h.status &= ~CAM_STATUS_MASK; + ccb->ccb_h.status |= CAM_RELEASE_SIMQ; + } + ccb->ccb_h.status |= CAM_CMD_TIMEOUT; + break; + default: + ch->fatalerr = 1; + ccb->ccb_h.status |= CAM_REQ_CMP_ERR; + } + /* Free slot. */ + ch->oslots &= ~(1 << slot->slot); + ch->rslots &= ~(1 << slot->slot); + ch->aslots &= ~(1 << slot->slot); + if (et != MVS_ERR_TIMEOUT) { + if (ch->toslots == (1 << slot->slot)) + xpt_release_simq(ch->sim, TRUE); + ch->toslots &= ~(1 << slot->slot); + } + slot->state = MVS_SLOT_EMPTY; + slot->ccb = NULL; + /* Update channel stats. */ + ch->numrslots--; + ch->numrslotspd[ccb->ccb_h.target_id]--; + if (ccb->ccb_h.func_code == XPT_ATA_IO) { + if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { + ch->otagspd[ccb->ccb_h.target_id] &= ~(1 << slot->tag); + ch->numtslots--; + ch->numtslotspd[ccb->ccb_h.target_id]--; + } else if (ccb->ataio.cmd.flags & CAM_ATAIO_DMA) { + ch->numdslots--; + } else { + ch->numpslots--; + } + } else { + ch->numpslots--; + ch->basic_dma = 0; + } + /* If it was our READ LOG command - process it. */ + if (ch->readlog) { + mvs_process_read_log(dev, ccb); + /* If it was NCQ command error, put result on hold. */ + } else if (et == MVS_ERR_NCQ) { + ch->hold[slot->slot] = ccb; + ch->holdtag[slot->slot] = slot->tag; + ch->numhslots++; + } else + xpt_done(ccb); + /* Unfreeze frozen command. */ + if (ch->frozen && !mvs_check_collision(dev, ch->frozen)) { + union ccb *fccb = ch->frozen; + ch->frozen = NULL; + mvs_begin_transaction(dev, fccb); + xpt_release_simq(ch->sim, TRUE); + } + /* If we have no other active commands, ... */ + if (ch->rslots == 0) { + /* if there was fatal error - reset port. */ + if (ch->toslots != 0 || ch->fatalerr) { + mvs_reset(dev); + } else { + /* if we have slots in error, we can reinit port. */ + if (ch->eslots != 0) { + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + ch->eslots = 0; + } + /* if there commands on hold, we can do READ LOG. */ + if (!ch->readlog && ch->numhslots) + mvs_issue_read_log(dev); + } + /* If all the rest of commands are in timeout - give them chance. */ + } else if ((ch->rslots & ~ch->toslots) == 0 && + et != MVS_ERR_TIMEOUT) + mvs_rearm_timeout(dev); + /* Start PM timer. */ + if (ch->numrslots == 0 && ch->pm_level > 3 && + (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { + callout_schedule(&ch->pm_timer, + (ch->pm_level == 4) ? hz / 1000 : hz / 8); + } +} + +static void +mvs_issue_read_log(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + union ccb *ccb; + struct ccb_ataio *ataio; + int i; + + ch->readlog = 1; + /* Find some holden command. */ + for (i = 0; i < MVS_MAX_SLOTS; i++) { + if (ch->hold[i]) + break; + } + ccb = xpt_alloc_ccb_nowait(); + if (ccb == NULL) { + device_printf(dev, "Unable allocate READ LOG command"); + return; /* XXX */ + } + ccb->ccb_h = ch->hold[i]->ccb_h; /* Reuse old header. */ + ccb->ccb_h.func_code = XPT_ATA_IO; + ccb->ccb_h.flags = CAM_DIR_IN; + ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ + ataio = &ccb->ataio; + ataio->data_ptr = malloc(512, M_MVS, M_NOWAIT); + if (ataio->data_ptr == NULL) { + device_printf(dev, "Unable allocate memory for READ LOG command"); + return; /* XXX */ + } + ataio->dxfer_len = 512; + bzero(&ataio->cmd, sizeof(ataio->cmd)); + ataio->cmd.flags = CAM_ATAIO_48BIT; + ataio->cmd.command = 0x2F; /* READ LOG EXT */ + ataio->cmd.sector_count = 1; + ataio->cmd.sector_count_exp = 0; + ataio->cmd.lba_low = 0x10; + ataio->cmd.lba_mid = 0; + ataio->cmd.lba_mid_exp = 0; + /* Freeze SIM while doing READ LOG EXT. */ + xpt_freeze_simq(ch->sim, 1); + mvs_begin_transaction(dev, ccb); +} + +static void +mvs_process_read_log(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + uint8_t *data; + struct ata_res *res; + int i; + + ch->readlog = 0; + + data = ccb->ataio.data_ptr; + if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && + (data[0] & 0x80) == 0) { + for (i = 0; i < MVS_MAX_SLOTS; i++) { + if (!ch->hold[i]) + continue; + if (ch->hold[i]->ccb_h.target_id != ccb->ccb_h.target_id) + continue; + if ((data[0] & 0x1F) == ch->holdtag[i]) { + res = &ch->hold[i]->ataio.res; + res->status = data[2]; + res->error = data[3]; + res->lba_low = data[4]; + res->lba_mid = data[5]; + res->lba_high = data[6]; + res->device = data[7]; + res->lba_low_exp = data[8]; + res->lba_mid_exp = data[9]; + res->lba_high_exp = data[10]; + res->sector_count = data[12]; + res->sector_count_exp = data[13]; + } else { + ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; + ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ; + } + xpt_done(ch->hold[i]); + ch->hold[i] = NULL; + ch->numhslots--; + } + } else { + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) + device_printf(dev, "Error while READ LOG EXT\n"); + else if ((data[0] & 0x80) == 0) { + device_printf(dev, "Non-queued command error in READ LOG EXT\n"); + } + for (i = 0; i < MVS_MAX_SLOTS; i++) { + if (!ch->hold[i]) + continue; + if (ch->hold[i]->ccb_h.target_id != ccb->ccb_h.target_id) + continue; + xpt_done(ch->hold[i]); + ch->hold[i] = NULL; + ch->numhslots--; + } + } + free(ccb->ataio.data_ptr, M_MVS); + xpt_free_ccb(ccb); + xpt_release_simq(ch->sim, TRUE); +} + +static int +mvs_wait(device_t dev, u_int s, u_int c, int t) +{ + int timeout = 0; + uint8_t st; + + while (((st = mvs_getstatus(dev, 0)) & (s | c)) != s) { + DELAY(1000); + if (timeout++ > t) { + device_printf(dev, "Wait status %02x\n", st); + return (-1); + } + } + return (timeout); +} + +static void +mvs_requeue_frozen(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + union ccb *fccb = ch->frozen; + + if (fccb) { + ch->frozen = NULL; + fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; + if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { + xpt_freeze_devq(fccb->ccb_h.path, 1); + fccb->ccb_h.status |= CAM_DEV_QFRZN; + } + xpt_done(fccb); + } +} + +static void +mvs_reset(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int i; + + xpt_freeze_simq(ch->sim, 1); + if (bootverbose) + device_printf(dev, "MVS reset...\n"); + /* Requeue freezed command. */ + mvs_requeue_frozen(dev); + /* Kill the engine and requeue all running commands. */ + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + ATA_OUTL(ch->r_mem, DMA_C, 0); + for (i = 0; i < MVS_MAX_SLOTS; i++) { + /* Do we have a running request on slot? */ + if (ch->slot[i].state < MVS_SLOT_RUNNING) + continue; + /* XXX; Commands in loading state. */ + mvs_end_transaction(&ch->slot[i], MVS_ERR_INNOCENT); + } + for (i = 0; i < MVS_MAX_SLOTS; i++) { + if (!ch->hold[i]) + continue; + xpt_done(ch->hold[i]); + ch->hold[i] = NULL; + ch->numhslots--; + } + if (ch->toslots != 0) + xpt_release_simq(ch->sim, TRUE); + ch->eslots = 0; + ch->toslots = 0; + ch->fatalerr = 0; + /* Tell the XPT about the event */ + xpt_async(AC_BUS_RESET, ch->path, NULL); + ATA_OUTL(ch->r_mem, EDMA_IEM, 0); + ATA_OUTL(ch->r_mem, EDMA_CMD, EDMA_CMD_EATARST); + DELAY(25); + ATA_OUTL(ch->r_mem, EDMA_CMD, 0); + /* Reset and reconnect PHY, */ + if (!mvs_sata_phy_reset(dev)) { + if (bootverbose) + device_printf(dev, + "MVS reset done: phy reset found no device\n"); + ch->devices = 0; + ATA_OUTL(ch->r_mem, SATA_SE, 0xffffffff); + ATA_OUTL(ch->r_mem, EDMA_IEC, 0); + ATA_OUTL(ch->r_mem, EDMA_IEM, ~EDMA_IE_TRANSIENT); + xpt_release_simq(ch->sim, TRUE); + return; + } + /* Wait for clearing busy status. */ + if ((i = mvs_wait(dev, 0, ATA_S_BUSY | ATA_S_DRQ, 15000)) < 0) + device_printf(dev, "device is not ready\n"); + else if (bootverbose) + device_printf(dev, "ready wait time=%dms\n", i); + ch->devices = 1; + ATA_OUTL(ch->r_mem, SATA_SE, 0xffffffff); + ATA_OUTL(ch->r_mem, EDMA_IEC, 0); + ATA_OUTL(ch->r_mem, EDMA_IEM, ~EDMA_IE_TRANSIENT); + if (bootverbose) + device_printf(dev, "MVS reset done: device found\n"); + xpt_release_simq(ch->sim, TRUE); +} + +static void +mvs_softreset(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + int port = ccb->ccb_h.target_id & 0x0f; + int i; + + mvs_set_edma_mode(dev, MVS_EDMA_OFF); + ATA_OUTB(ch->r_mem, SATA_SATAICTL, port << SATA_SATAICTL_PMPTX_SHIFT); + ATA_OUTB(ch->r_mem, ATA_CONTROL, ATA_A_RESET); + DELAY(10000); + ATA_OUTB(ch->r_mem, ATA_CONTROL, 0); + ccb->ccb_h.status &= ~CAM_STATUS_MASK; + /* Wait for clearing busy status. */ + if ((i = mvs_wait(dev, 0, ATA_S_BUSY | ATA_S_DRQ, ccb->ccb_h.timeout)) < 0) { + ccb->ccb_h.status |= CAM_CMD_TIMEOUT; + } else { + ccb->ccb_h.status |= CAM_REQ_CMP; + } + mvs_tfd_read(dev, ccb); + xpt_done(ccb); +} + +static int +mvs_sata_connect(struct mvs_channel *ch) +{ + u_int32_t status; + int timeout; + + /* Wait up to 100ms for "connect well" */ + for (timeout = 0; timeout < 100 ; timeout++) { + status = ATA_INL(ch->r_mem, SATA_SS); + if (((status & SATA_SS_DET_MASK) == SATA_SS_DET_PHY_ONLINE) && + ((status & SATA_SS_SPD_MASK) != SATA_SS_SPD_NO_SPEED) && + ((status & SATA_SS_IPM_MASK) == SATA_SS_IPM_ACTIVE)) + break; + if ((status & SATA_SS_DET_MASK) == SATA_SS_DET_PHY_OFFLINE) { + if (bootverbose) { + device_printf(ch->dev, "SATA offline status=%08x\n", + status); + } + return (0); + } + DELAY(1000); + } + if (timeout >= 100) { + if (bootverbose) { + device_printf(ch->dev, "SATA connect timeout status=%08x\n", + status); + } + return (0); + } + if (bootverbose) { + device_printf(ch->dev, "SATA connect time=%dms status=%08x\n", + timeout, status); + } + /* Clear SATA error register */ + ATA_OUTL(ch->r_mem, SATA_SE, 0xffffffff); + return (1); +} + +static int +mvs_sata_phy_reset(device_t dev) +{ + struct mvs_channel *ch = device_get_softc(dev); + int sata_rev; + uint32_t val; + + sata_rev = ch->user[ch->pm_present ? 15 : 0].revision; + if (sata_rev == 1) + val = SATA_SC_SPD_SPEED_GEN1; + else if (sata_rev == 2) + val = SATA_SC_SPD_SPEED_GEN2; + else if (sata_rev == 3) + val = SATA_SC_SPD_SPEED_GEN3; + else + val = 0; + ATA_OUTL(ch->r_mem, SATA_SC, + SATA_SC_DET_RESET | val | + SATA_SC_IPM_DIS_PARTIAL | SATA_SC_IPM_DIS_SLUMBER); + DELAY(5000); + ATA_OUTL(ch->r_mem, SATA_SC, + SATA_SC_DET_IDLE | val | ((ch->pm_level > 0) ? 0 : + (SATA_SC_IPM_DIS_PARTIAL | SATA_SC_IPM_DIS_SLUMBER))); + DELAY(5000); + if (!mvs_sata_connect(ch)) { + if (ch->pm_level > 0) + ATA_OUTL(ch->r_mem, SATA_SC, SATA_SC_DET_DISABLE); + return (0); + } + return (1); +} + +static int +mvs_check_ids(device_t dev, union ccb *ccb) +{ + struct mvs_channel *ch = device_get_softc(dev); + + if (ccb->ccb_h.target_id > ((ch->quirks & MVS_Q_GENI) ? 0 : 15)) { + ccb->ccb_h.status = CAM_TID_INVALID; + xpt_done(ccb); + return (-1); + } + if (ccb->ccb_h.target_lun != 0) { + ccb->ccb_h.status = CAM_LUN_INVALID; + xpt_done(ccb); + return (-1); + } + return (0); +} + +static void +mvsaction(struct cam_sim *sim, union ccb *ccb) +{ + device_t dev; + struct mvs_channel *ch; + + CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("mvsaction func_code=%x\n", + ccb->ccb_h.func_code)); + + ch = (struct mvs_channel *)cam_sim_softc(sim); + dev = ch->dev; + switch (ccb->ccb_h.func_code) { + /* Common cases first */ + case XPT_ATA_IO: /* Execute the requested I/O operation */ + case XPT_SCSI_IO: + if (mvs_check_ids(dev, ccb)) + return; + if (ch->devices == 0 || + (ch->pm_present == 0 && + ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { + ccb->ccb_h.status = CAM_SEL_TIMEOUT; + break; + } + /* Check for command collision. */ + if (mvs_check_collision(dev, ccb)) { + /* Freeze command. */ + ch->frozen = ccb; + /* We have only one frozen slot, so freeze simq also. */ + xpt_freeze_simq(ch->sim, 1); + return; + } + mvs_begin_transaction(dev, ccb); + return; + case XPT_EN_LUN: /* Enable LUN as a target */ + case XPT_TARGET_IO: /* Execute target I/O request */ + case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ + case XPT_CONT_TARGET_IO: /* Continue Host Target I/O Connection*/ + case XPT_ABORT: /* Abort the specified CCB */ + /* XXX Implement */ + ccb->ccb_h.status = CAM_REQ_INVALID; + break; + case XPT_SET_TRAN_SETTINGS: + { + struct ccb_trans_settings *cts = &ccb->cts; + struct mvs_device *d; + + if (mvs_check_ids(dev, ccb)) + return; + if (cts->type == CTS_TYPE_CURRENT_SETTINGS) + d = &ch->curr[ccb->ccb_h.target_id]; + else + d = &ch->user[ccb->ccb_h.target_id]; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION) + d->revision = cts->xport_specific.sata.revision; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE) + d->mode = cts->xport_specific.sata.mode; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT) { + d->bytecount = min((ch->quirks & MVS_Q_GENIIE) ? 8192 : 2048, + cts->xport_specific.sata.bytecount); + } + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS) + d->tags = min(MVS_MAX_SLOTS, cts->xport_specific.sata.tags); + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM) + ch->pm_present = cts->xport_specific.sata.pm_present; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI) + d->atapi = cts->xport_specific.sata.atapi; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + d->caps = cts->xport_specific.sata.caps; + ccb->ccb_h.status = CAM_REQ_CMP; + break; + } + case XPT_GET_TRAN_SETTINGS: + /* Get default/user set transfer settings for the target */ + { + struct ccb_trans_settings *cts = &ccb->cts; + struct mvs_device *d; + uint32_t status; + + if (mvs_check_ids(dev, ccb)) + return; + if (cts->type == CTS_TYPE_CURRENT_SETTINGS) + d = &ch->curr[ccb->ccb_h.target_id]; + else + d = &ch->user[ccb->ccb_h.target_id]; + cts->protocol = PROTO_ATA; + cts->protocol_version = PROTO_VERSION_UNSPECIFIED; + cts->transport = XPORT_SATA; + cts->transport_version = XPORT_VERSION_UNSPECIFIED; + cts->proto_specific.valid = 0; + cts->xport_specific.sata.valid = 0; + if (cts->type == CTS_TYPE_CURRENT_SETTINGS && + (ccb->ccb_h.target_id == 15 || + (ccb->ccb_h.target_id == 0 && !ch->pm_present))) { + status = ATA_INL(ch->r_mem, SATA_SS) & SATA_SS_SPD_MASK; + if (status & 0x0f0) { + cts->xport_specific.sata.revision = + (status & 0x0f0) >> 4; + cts->xport_specific.sata.valid |= + CTS_SATA_VALID_REVISION; + } + cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; +// if (ch->pm_level) +// cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; + cts->xport_specific.sata.caps &= + ch->user[ccb->ccb_h.target_id].caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; + } else { + cts->xport_specific.sata.revision = d->revision; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; + cts->xport_specific.sata.caps = d->caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; + } + cts->xport_specific.sata.mode = d->mode; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; + cts->xport_specific.sata.bytecount = d->bytecount; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT; + cts->xport_specific.sata.pm_present = ch->pm_present; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM; + cts->xport_specific.sata.tags = d->tags; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS; + cts->xport_specific.sata.atapi = d->atapi; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; + ccb->ccb_h.status = CAM_REQ_CMP; + break; + } + case XPT_RESET_BUS: /* Reset the specified SCSI bus */ + case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ + mvs_reset(dev); + ccb->ccb_h.status = CAM_REQ_CMP; + break; + case XPT_TERM_IO: /* Terminate the I/O process */ + /* XXX Implement */ + ccb->ccb_h.status = CAM_REQ_INVALID; + break; + case XPT_PATH_INQ: /* Path routing inquiry */ + { + struct ccb_pathinq *cpi = &ccb->cpi; + + cpi->version_num = 1; /* XXX??? */ + cpi->hba_inquiry = PI_SDTR_ABLE; + if (!(ch->quirks & MVS_Q_GENI)) { + cpi->hba_inquiry |= PI_SATAPM; + /* Gen-II is extremely slow with NCQ on PMP. */ + if ((ch->quirks & MVS_Q_GENIIE) || ch->pm_present == 0) + cpi->hba_inquiry |= PI_TAG_ABLE; + } + cpi->target_sprt = 0; + cpi->hba_misc = PIM_SEQSCAN; + cpi->hba_eng_cnt = 0; + if (!(ch->quirks & MVS_Q_GENI)) + cpi->max_target = 15; + else + cpi->max_target = 0; + cpi->max_lun = 0; + cpi->initiator_id = 0; + cpi->bus_id = cam_sim_bus(sim); + cpi->base_transfer_speed = 150000; + strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); + strncpy(cpi->hba_vid, "Marvell", HBA_IDLEN); + strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); + cpi->unit_number = cam_sim_unit(sim); + cpi->transport = XPORT_SATA; + cpi->transport_version = XPORT_VERSION_UNSPECIFIED; + cpi->protocol = PROTO_ATA; + cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; + cpi->maxio = MAXPHYS; + cpi->ccb_h.status = CAM_REQ_CMP; + break; + } + default: + ccb->ccb_h.status = CAM_REQ_INVALID; + break; + } + xpt_done(ccb); +} + +static void +mvspoll(struct cam_sim *sim) +{ + struct mvs_channel *ch = (struct mvs_channel *)cam_sim_softc(sim); + struct mvs_intr_arg arg; + + arg.arg = ch->dev; + arg.cause = 2; /* XXX */ + mvs_ch_intr(&arg); +} + diff --git a/sys/dev/mvs/mvs.h b/sys/dev/mvs/mvs.h new file mode 100644 index 00000000000..9ec4e3d265a --- /dev/null +++ b/sys/dev/mvs/mvs.h @@ -0,0 +1,650 @@ +/*- + * Copyright (c) 2010 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "mvs_if.h" + +/* Chip registers */ +#define CHIP_PCIEIC 0x1900 /* PCIe Interrupt Cause */ +#define CHIP_PCIEIM 0x1910 /* PCIe Interrupt Mask */ +#define CHIP_PCIIC 0x1d58 /* PCI Interrupt Cause */ +#define CHIP_PCIIM 0x1d5c /* PCI Interrupt Mask */ +#define CHIP_MIC 0x1d60 /* Main Interrupt Cause */ +#define CHIP_MIM 0x1d64 /* Main Interrupt Mask */ +#define CHIP_SOC_MIC 0x20 /* SoC Main Interrupt Cause */ +#define CHIP_SOC_MIM 0x24 /* SoC Main Interrupt Mask */ +#define IC_ERR_IRQ (1 << 0) /* shift by (2 * port #) */ +#define IC_DONE_IRQ (1 << 1) /* shift by (2 * port #) */ +#define IC_HC0 0x000001ff /* bits 0-8 = HC0 */ +#define IC_HC_SHIFT 9 /* HC1 shift */ +#define IC_HC1 (IC_HC0 << IC_HC_SHIFT) /* 9-17 = HC1 */ +#define IC_ERR_HC0 0x00000055 /* HC0 ERR_IRQ */ +#define IC_DONE_HC0 0x000000aa /* HC0 DONE_IRQ */ +#define IC_ERR_HC1 (IC_ERR_HC0 << IC_HC_SHIFT) /* HC1 ERR_IRQ */ +#define IC_DONE_HC1 (IC_DONE_HC0 << IC_HC_SHIFT) /* HC1 DONE_IRQ */ +#define IC_HC0_COAL_DONE (1 << 8) /* HC0 IRQ coalescing */ +#define IC_HC1_COAL_DONE (1 << 17) /* HC1 IRQ coalescing */ +#define IC_PCI_ERR (1 << 18) +#define IC_TRAN_COAL_LO_DONE (1 << 19) /* transaction coalescing */ +#define IC_TRAN_COAL_HI_DONE (1 << 20) /* transaction coalescing */ +#define IC_ALL_PORTS_COAL_DONE (1 << 21) /* GEN_II(E) IRQ coalescing */ +#define IC_GPIO_INT (1 << 22) +#define IC_SELF_INT (1 << 23) +#define IC_TWSI_INT (1 << 24) +#define IC_MAIN_RSVD (0xfe000000) /* bits 31-25 */ +#define IC_MAIN_RSVD_5 (0xfff10000) /* bits 31-19 */ +#define IC_MAIN_RSVD_SOC (0xfffffec0) /* bits 31-9, 7-6 */ + +#define CHIP_SOC_LED 0x2C /* SoC LED Configuration */ + +/* Chip CCC registers */ +#define CHIP_ICC 0x18008 +#define CHIP_ICC_ALL_PORTS (1 << 4) /* all ports irq event */ +#define CHIP_ICT 0x180cc +#define CHIP_ITT 0x180d0 +#define CHIP_TRAN_COAL_CAUSE_LO 0x18088 +#define CHIP_TRAN_COAL_CAUSE_HI 0x1808c + +/* Host Controller registers */ +#define HC_SIZE 0x10000 +#define HC_OFFSET 0x20000 +#define HC_BASE(hc) ((hc) * HC_SIZE + HC_OFFSET) + +#define HC_CFG 0x0 /* Configuration */ +#define HC_CFG_TIMEOUT_MASK (0xff << 0) +#define HC_CFG_NODMABS (1 << 8) +#define HC_CFG_NOEDMABS (1 << 9) +#define HC_CFG_NOPRDBS (1 << 10) +#define HC_CFG_TIMEOUTEN (1 << 16) /* Timer Enable */ +#define HC_CFG_COALDIS(p) (1 << ((p) + 24))/* Coalescing Disable*/ +#define HC_RQOP 0x4 /* Request Queue Out-Pointer */ +#define HC_RQIP 0x8 /* Response Queue In-Pointer */ +#define HC_ICT 0xc /* Interrupt Coalescing Threshold */ +#define HC_ICT_SAICOALT_MASK 0x000000ff +#define HC_ITT 0x10 /* Interrupt Time Threshold */ +#define HC_ITT_SAITMTH_MASK 0x00ffffff +#define HC_IC 0x14 /* Interrupt Cause */ +#define HC_IC_DONE(p) (1 << (p)) /* SaCrpb/DMA Done */ +#define HC_IC_COAL (1 << 4) /* Intr Coalescing */ +#define HC_IC_DEV(p) (1 << ((p) + 8)) /* Device Intr */ + +/* Port registers */ +#define PORT_SIZE 0x2000 +#define PORT_OFFSET 0x2000 +#define PORT_BASE(hc) ((hc) * PORT_SIZE + PORT_OFFSET) + +#define EDMA_CFG 0x0 /* Configuration */ +#define EDMA_CFG_RESERVED (0x1f << 0) /* Queue len ? */ +#define EDMA_CFG_ESATANATVCMDQUE (1 << 5) +#define EDMA_CFG_ERDBSZ (1 << 8) +#define EDMA_CFG_EQUE (1 << 9) +#define EDMA_CFG_ERDBSZEXT (1 << 11) +#define EDMA_CFG_RESERVED2 (1 << 12) +#define EDMA_CFG_EWRBUFFERLEN (1 << 13) +#define EDMA_CFG_EDEVERR (1 << 14) +#define EDMA_CFG_EEDMAFBS (1 << 16) +#define EDMA_CFG_ECUTTHROUGHEN (1 << 17) +#define EDMA_CFG_EEARLYCOMPLETIONEN (1 << 18) +#define EDMA_CFG_EEDMAQUELEN (1 << 19) +#define EDMA_CFG_EHOSTQUEUECACHEEN (1 << 22) +#define EDMA_CFG_EMASKRXPM (1 << 23) +#define EDMA_CFG_RESUMEDIS (1 << 24) +#define EDMA_CFG_EDMAFBS (1 << 26) +#define EDMA_T 0x4 /* Timer */ +#define EDMA_IEC 0x8 /* Interrupt Error Cause */ +#define EDMA_IEM 0xc /* Interrupt Error Mask */ +#define EDMA_IE_EDEVERR (1 << 2) /* EDMA Device Error */ +#define EDMA_IE_EDEVDIS (1 << 3) /* EDMA Dev Disconn */ +#define EDMA_IE_EDEVCON (1 << 4) /* EDMA Dev Conn */ +#define EDMA_IE_SERRINT (1 << 5) +#define EDMA_IE_ESELFDIS (1 << 7) /* EDMA Self Disable */ +#define EDMA_IE_ETRANSINT (1 << 8) /* Transport Layer */ +#define EDMA_IE_EIORDYERR (1 << 12) /* EDMA IORdy Error */ +#define EDMA_IE_LINKXERR_SATACRC (1 << 0) /* SATA CRC error */ +#define EDMA_IE_LINKXERR_INTERNALFIFO (1 << 1) /* internal FIFO err */ +#define EDMA_IE_LINKXERR_LINKLAYERRESET (1 << 2) + /* Link Layer is reset by the reception of SYNC primitive from device */ +#define EDMA_IE_LINKXERR_OTHERERRORS (1 << 3) + /* + * Link state errors, coding errors, or running disparity errors occur + * during FIS reception. + */ +#define EDMA_IE_LINKTXERR_FISTXABORTED (1 << 4) /* FIS Tx is aborted */ +#define EDMA_IE_LINKCTLRXERR(x) ((x) << 13) /* Link Ctrl Recv Err */ +#define EDMA_IE_LINKDATARXERR(x) ((x) << 17) /* Link Data Recv Err */ +#define EDMA_IE_LINKCTLTXERR(x) ((x) << 21) /* Link Ctrl Tx Error */ +#define EDMA_IE_LINKDATATXERR(x) ((x) << 26) /* Link Data Tx Error */ +#define EDMA_IE_TRANSPROTERR (1 << 31) /* Transport Proto E */ +#define EDMA_IE_TRANSIENT (EDMA_IE_LINKCTLRXERR(0x0b) | \ + EDMA_IE_LINKCTLTXERR(0x1f)) + /* Non-fatal Errors */ +#define EDMA_REQQBAH 0x10 /* Request Queue Base Address High */ +#define EDMA_REQQIP 0x14 /* Request Queue In-Pointer */ +#define EDMA_REQQOP 0x18 /* Request Queue Out-Pointer */ +#define EDMA_REQQP_ERQQP_SHIFT 5 +#define EDMA_REQQP_ERQQP_MASK 0x000003e0 +#define EDMA_REQQP_ERQQBAP_MASK 0x00000c00 +#define EDMA_REQQP_ERQQBA_MASK 0xfffff000 +#define EDMA_RESQBAH 0x1c /* Response Queue Base Address High */ +#define EDMA_RESQIP 0x20 /* Response Queue In-Pointer */ +#define EDMA_RESQOP 0x24 /* Response Queue Out-Pointer */ +#define EDMA_RESQP_ERPQP_SHIFT 3 +#define EDMA_RESQP_ERPQP_MASK 0x000000f8 +#define EDMA_RESQP_ERPQBAP_MASK 0x00000300 +#define EDMA_RESQP_ERPQBA_MASK 0xfffffc00 +#define EDMA_CMD 0x28 /* Command */ +#define EDMA_CMD_EENEDMA (1 << 0) /* Enable EDMA */ +#define EDMA_CMD_EDSEDMA (1 << 1) /* Disable EDMA */ +#define EDMA_CMD_EATARST (1 << 2) /* ATA Device Reset */ +#define EDMA_CMD_EEDMAFRZ (1 << 4) /* EDMA Freeze */ +#define EDMA_TC 0x2c /* Test Control */ +#define EDMA_S 0x30 /* Status */ +#define EDMA_S_EDEVQUETAG(s) ((s) & 0x0000001f) +#define EDMA_S_EDEVDIR_WRITE (0 << 5) +#define EDMA_S_EDEVDIR_READ (1 << 5) +#define EDMA_S_ECACHEEMPTY (1 << 6) +#define EDMA_S_EDMAIDLE (1 << 7) +#define EDMA_S_ESTATE(s) (((s) & 0x0000ff00) >> 8) +#define EDMA_S_EIOID(s) (((s) & 0x003f0000) >> 16) +#define EDMA_IORT 0x34 /* IORdy Timeout */ +#define EDMA_CDT 0x40 /* Command Delay Threshold */ +#define EDMA_HC 0x60 /* Halt Condition */ +#define EDMA_UNKN_RESD 0x6C /* Unknown register */ +#define EDMA_CQDCQOS(x) (0x90 + ((x) << 2) + /* NCQ Done/TCQ Outstanding Status */ + +/* ATA register defines */ +#define ATA_DATA 0x100 /* (RW) data */ +#define ATA_FEATURE 0x104 /* (W) feature */ +#define ATA_F_DMA 0x01 /* enable DMA */ +#define ATA_F_OVL 0x02 /* enable overlap */ +#define ATA_ERROR 0x104 /* (R) error */ +#define ATA_E_ILI 0x01 /* illegal length */ +#define ATA_E_NM 0x02 /* no media */ +#define ATA_E_ABORT 0x04 /* command aborted */ +#define ATA_E_MCR 0x08 /* media change request */ +#define ATA_E_IDNF 0x10 /* ID not found */ +#define ATA_E_MC 0x20 /* media changed */ +#define ATA_E_UNC 0x40 /* uncorrectable data */ +#define ATA_E_ICRC 0x80 /* UDMA crc error */ +#define ATA_E_ATAPI_SENSE_MASK 0xf0 /* ATAPI sense key mask */ +#define ATA_COUNT 0x108 /* (W) sector count */ +#define ATA_IREASON 0x108 /* (R) interrupt reason */ +#define ATA_I_CMD 0x01 /* cmd (1) | data (0) */ +#define ATA_I_IN 0x02 /* read (1) | write (0) */ +#define ATA_I_RELEASE 0x04 /* released bus (1) */ +#define ATA_I_TAGMASK 0xf8 /* tag mask */ +#define ATA_SECTOR 0x10c /* (RW) sector # */ +#define ATA_CYL_LSB 0x110 /* (RW) cylinder# LSB */ +#define ATA_CYL_MSB 0x114 /* (RW) cylinder# MSB */ +#define ATA_DRIVE 0x118 /* (W) Sector/Drive/Head */ +#define ATA_D_LBA 0x40 /* use LBA addressing */ +#define ATA_D_IBM 0xa0 /* 512 byte sectors, ECC */ +#define ATA_COMMAND 0x11c /* (W) command */ +#define ATA_STATUS 0x11c /* (R) status */ +#define ATA_S_ERROR 0x01 /* error */ +#define ATA_S_INDEX 0x02 /* index */ +#define ATA_S_CORR 0x04 /* data corrected */ +#define ATA_S_DRQ 0x08 /* data request */ +#define ATA_S_DSC 0x10 /* drive seek completed */ +#define ATA_S_SERVICE 0x10 /* drive needs service */ +#define ATA_S_DWF 0x20 /* drive write fault */ +#define ATA_S_DMA 0x20 /* DMA ready */ +#define ATA_S_READY 0x40 /* drive ready */ +#define ATA_S_BUSY 0x80 /* busy */ +#define ATA_CONTROL 0x120 /* (W) control */ +#define ATA_A_IDS 0x02 /* disable interrupts */ +#define ATA_A_RESET 0x04 /* RESET controller */ +#define ATA_A_4BIT 0x08 /* 4 head bits */ +#define ATA_A_HOB 0x80 /* High Order Byte enable */ +#define ATA_ALTSTAT 0x120 /* (R) alternate status */ +#define ATAPI_P_READ (ATA_S_DRQ | ATA_I_IN) +#define ATAPI_P_WRITE (ATA_S_DRQ) +#define ATAPI_P_CMDOUT (ATA_S_DRQ | ATA_I_CMD) +#define ATAPI_P_DONEDRQ (ATA_S_DRQ | ATA_I_CMD | ATA_I_IN) +#define ATAPI_P_DONE (ATA_I_CMD | ATA_I_IN) +#define ATAPI_P_ABORT 0 + +/* Basic DMA Registers */ +#define DMA_C 0x224 /* Basic DMA Command */ +#define DMA_C_START (1 << 0) +#define DMA_C_READ (1 << 3) +#define DMA_C_DREGIONVALID (1 << 8) +#define DMA_C_DREGIONLAST (1 << 9) +#define DMA_C_CONTFROMPREV (1 << 10) +#define DMA_C_DRBC(n) (((n) & 0xffff) << 16) +#define DMA_S 0x228 /* Basic DMA Status */ +#define DMA_S_ACT (1 << 0) /* Active */ +#define DMA_S_ERR (1 << 1) /* Error */ +#define DMA_S_PAUSED (1 << 2) /* Paused */ +#define DMA_S_LAST (1 << 3) /* Last */ +#define DMA_DTLBA 0x22c /* Descriptor Table Low Base Address */ +#define DMA_DTLBA_MASK 0xfffffff0 +#define DMA_DTHBA 0x230 /* Descriptor Table High Base Address */ +#define DMA_DRLA 0x234 /* Data Region Low Address */ +#define DMA_DRHA 0x238 /* Data Region High Address */ + +/* Serial-ATA Registers */ +#define SATA_SS 0x300 /* SStatus */ +#define SATA_SS_DET_MASK 0x0000000f +#define SATA_SS_DET_NO_DEVICE 0x00000000 +#define SATA_SS_DET_DEV_PRESENT 0x00000001 +#define SATA_SS_DET_PHY_ONLINE 0x00000003 +#define SATA_SS_DET_PHY_OFFLINE 0x00000004 + +#define SATA_SS_SPD_MASK 0x000000f0 +#define SATA_SS_SPD_NO_SPEED 0x00000000 +#define SATA_SS_SPD_GEN1 0x00000010 +#define SATA_SS_SPD_GEN2 0x00000020 +#define SATA_SS_SPD_GEN3 0x00000040 + +#define SATA_SS_IPM_MASK 0x00000f00 +#define SATA_SS_IPM_NO_DEVICE 0x00000000 +#define SATA_SS_IPM_ACTIVE 0x00000100 +#define SATA_SS_IPM_PARTIAL 0x00000200 +#define SATA_SS_IPM_SLUMBER 0x00000600 +#define SATA_SE 0x304 /* SError */ +#define SATA_SEIM 0x340 /* SError Interrupt Mask */ +#define SATA_SE_DATA_CORRECTED 0x00000001 +#define SATA_SE_COMM_CORRECTED 0x00000002 +#define SATA_SE_DATA_ERR 0x00000100 +#define SATA_SE_COMM_ERR 0x00000200 +#define SATA_SE_PROT_ERR 0x00000400 +#define SATA_SE_HOST_ERR 0x00000800 +#define SATA_SE_PHY_CHANGED 0x00010000 +#define SATA_SE_PHY_IERROR 0x00020000 +#define SATA_SE_COMM_WAKE 0x00040000 +#define SATA_SE_DECODE_ERR 0x00080000 +#define SATA_SE_PARITY_ERR 0x00100000 +#define SATA_SE_CRC_ERR 0x00200000 +#define SATA_SE_HANDSHAKE_ERR 0x00400000 +#define SATA_SE_LINKSEQ_ERR 0x00800000 +#define SATA_SE_TRANSPORT_ERR 0x01000000 +#define SATA_SE_UNKNOWN_FIS 0x02000000 +#define SATA_SC 0x308 /* SControl */ +#define SATA_SC_DET_MASK 0x0000000f +#define SATA_SC_DET_IDLE 0x00000000 +#define SATA_SC_DET_RESET 0x00000001 +#define SATA_SC_DET_DISABLE 0x00000004 + +#define SATA_SC_SPD_MASK 0x000000f0 +#define SATA_SC_SPD_NO_SPEED 0x00000000 +#define SATA_SC_SPD_SPEED_GEN1 0x00000010 +#define SATA_SC_SPD_SPEED_GEN2 0x00000020 +#define SATA_SC_SPD_SPEED_GEN3 0x00000040 + +#define SATA_SC_IPM_MASK 0x00000f00 +#define SATA_SC_IPM_NONE 0x00000000 +#define SATA_SC_IPM_DIS_PARTIAL 0x00000100 +#define SATA_SC_IPM_DIS_SLUMBER 0x00000200 + +#define SATA_SC_SPM_MASK 0x0000f000 +#define SATA_SC_SPM_NONE 0x00000000 +#define SATA_SC_SPM_PARTIAL 0x00001000 +#define SATA_SC_SPM_SLUMBER 0x00002000 +#define SATA_SC_SPM_ACTIVE 0x00004000 +#define SATA_LTM 0x30c /* LTMode */ +#define SATA_PHYM3 0x310 /* PHY Mode 3 */ +#define SATA_PHYM4 0x314 /* PHY Mode 4 */ +#define SATA_PHYM1 0x32c /* PHY Mode 1 */ +#define SATA_PHYM2 0x330 /* PHY Mode 2 */ +#define SATA_BISTC 0x334 /* BIST Control */ +#define SATA_BISTDW1 0x338 /* BIST DW1 */ +#define SATA_BISTDW2 0x33c /* BIST DW2 */ +#define SATA_SATAICFG 0x050 /* Serial-ATA Interface Configuration */ +#define SATA_SATAICFG_REFCLKCNF_20MHZ (0 << 0) +#define SATA_SATAICFG_REFCLKCNF_25MHZ (1 << 0) +#define SATA_SATAICFG_REFCLKCNF_30MHZ (2 << 0) +#define SATA_SATAICFG_REFCLKCNF_40MHZ (3 << 0) +#define SATA_SATAICFG_REFCLKCNF_MASK (3 << 0) +#define SATA_SATAICFG_REFCLKDIV_1 (0 << 2) +#define SATA_SATAICFG_REFCLKDIV_2 (1 << 2) /* Used 20 or 25MHz */ +#define SATA_SATAICFG_REFCLKDIV_4 (2 << 2) /* Used 40MHz */ +#define SATA_SATAICFG_REFCLKDIV_3 (3 << 2) /* Used 30MHz */ +#define SATA_SATAICFG_REFCLKDIV_MASK (3 << 2) +#define SATA_SATAICFG_REFCLKFEEDDIV_50 (0 << 4) /* or 100, when Gen2En is 1 */ +#define SATA_SATAICFG_REFCLKFEEDDIV_60 (1 << 4) /* or 120. Used 25MHz */ +#define SATA_SATAICFG_REFCLKFEEDDIV_75 (2 << 4) /* or 150. Used 20MHz */ +#define SATA_SATAICFG_REFCLKFEEDDIV_90 (3 << 4) /* or 180 */ +#define SATA_SATAICFG_REFCLKFEEDDIV_MASK (3 << 4) +#define SATA_SATAICFG_PHYSSCEN (1 << 6) +#define SATA_SATAICFG_GEN2EN (1 << 7) +#define SATA_SATAICFG_COMMEN (1 << 8) +#define SATA_SATAICFG_PHYSHUTDOWN (1 << 9) +#define SATA_SATAICFG_TARGETMODE (1 << 10) /* 1 = Initiator */ +#define SATA_SATAICFG_COMCHANNEL (1 << 11) +#define SATA_SATAICFG_IGNOREBSY (1 << 24) +#define SATA_SATAICFG_LINKRSTEN (1 << 25) +#define SATA_SATAICFG_CMDRETXDS (1 << 26) +#define SATA_SATAICTL 0x344 /* Serial-ATA Interface Control */ +#define SATA_SATAICTL_PMPTX_MASK 0x0000000f +#define SATA_SATAICTL_PMPTX_SHIFT 0 +#define SATA_SATAICTL_VUM (1 << 8) +#define SATA_SATAICTL_VUS (1 << 9) +#define SATA_SATAICTL_EDMAACT (1 << 16) +#define SATA_SATAICTL_CLEARSTAT (1 << 24) +#define SATA_SATAICTL_SRST (1 << 25) +#define SATA_SATAITC 0x348 /* Serial-ATA Interface Test Control */ +#define SATA_SATAIS 0x34c /* Serial-ATA Interface Status */ +#define SATA_VU 0x35c /* Vendor Unique */ +#define SATA_FISC 0x360 /* FIS Configuration */ +#define SATA_FISC_FISWAIT4RDYEN_B0 (1 << 0) /* Device to Host FIS */ +#define SATA_FISC_FISWAIT4RDYEN_B1 (1 << 1) /* SDB FIS rcv with bit 0 */ +#define SATA_FISC_FISWAIT4RDYEN_B2 (1 << 2) /* DMA Activate FIS */ +#define SATA_FISC_FISWAIT4RDYEN_B3 (1 << 3) /* DMA Setup FIS */ +#define SATA_FISC_FISWAIT4RDYEN_B4 (1 << 4) /* Data FIS first DW */ +#define SATA_FISC_FISWAIT4RDYEN_B5 (1 << 5) /* Data FIS entire FIS */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B0 (1 << 8) + /* Device to Host FIS with or */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B1 (1 << 9) /* SDB FIS rcv with bit */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B2 (1 << 10) /* SDB FIS rcv with */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B3 (1 << 11) /* BIST Acivate FIS */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B4 (1 << 12) /* PIO Setup FIS */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B5 (1 << 13) /* Data FIS with Link error */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B6 (1 << 14) /* Unrecognized FIS type */ +#define SATA_FISC_FISWAIT4HOSTRDYEN_B7 (1 << 15) /* Any FIS */ +#define SATA_FISC_FISDMAACTIVATESYNCRESP (1 << 16) +#define SATA_FISC_FISUNRECTYPECONT (1 << 17) +#define SATA_FISIC 0x364 /* FIS Interrupt Cause */ +#define SATA_FISIM 0x368 /* FIS Interrupt Mask */ +#define SATA_FISDW0 0x370 /* FIS DW0 */ +#define SATA_FISDW1 0x374 /* FIS DW1 */ +#define SATA_FISDW2 0x378 /* FIS DW2 */ +#define SATA_FISDW3 0x37c /* FIS DW3 */ +#define SATA_FISDW4 0x380 /* FIS DW4 */ +#define SATA_FISDW5 0x384 /* FIS DW5 */ +#define SATA_FISDW6 0x388 /* FIS DW6 */ + +#define MVS_MAX_PORTS 8 +#define MVS_MAX_SLOTS 32 + +/* Pessimistic prognosis on number of required S/G entries */ +#define MVS_SG_ENTRIES (btoc(MAXPHYS) + 1) + +/* EDMA Command Request Block (CRQB) Data */ +struct mvs_crqb { + uint32_t cprdbl; /* cPRD Desriptor Table Base Low Address */ + uint32_t cprdbh; /* cPRD Desriptor Table Base High Address */ + uint16_t ctrlflg; /* Control Flags */ +#define MVS_CRQB_READ 0x0001 +#define MVS_CRQB_TAG_MASK 0x003e +#define MVS_CRQB_TAG_SHIFT 1 +#define MVS_CRQB_PMP_MASK 0xf000 +#define MVS_CRQB_PMP_SHIFT 12 + uint8_t cmd[22]; +} __packed; + +struct mvs_crqb_gen2e { + uint32_t cprdbl; /* cPRD Desriptor Table Base Low Address */ + uint32_t cprdbh; /* cPRD Desriptor Table Base High Address */ + uint32_t ctrlflg; /* Control Flags */ +#define MVS_CRQB2E_READ 0x00000001 +#define MVS_CRQB2E_DTAG_MASK 0x0000003e +#define MVS_CRQB2E_DTAG_SHIFT 1 +#define MVS_CRQB2E_PMP_MASK 0x0000f000 +#define MVS_CRQB2E_PMP_SHIFT 12 +#define MVS_CRQB2E_CPRD 0x00010000 +#define MVS_CRQB2E_HTAG_MASK 0x003e0000 +#define MVS_CRQB2E_HTAG_SHIFT 17 + uint32_t drbc; /* Data Region Byte Count */ + uint8_t cmd[16]; +} __packed; + +/* EDMA Phisical Region Descriptors (ePRD) Table Data Structure */ +struct mvs_eprd { + uint32_t prdbal; /* Address bits[31:1] */ + uint32_t bytecount; /* Byte Count */ +#define MVS_EPRD_MASK 0x0000ffff /* max 64KB */ +#define MVS_EPRD_MAX (MVS_EPRD_MASK + 1) +#define MVS_EPRD_EOF 0x80000000 + uint32_t prdbah; /* Address bits[63:32] */ + uint32_t resv; +} __packed; + +/* Command request blocks. 32 commands. First 1Kbyte aligned. */ +#define MVS_CRQB_OFFSET 0 +#define MVS_CRQB_SIZE 32 /* sizeof(struct mvs_crqb) */ +#define MVS_CRQB_MASK 0x000003e0 +#define MVS_CRQB_SHIFT 5 +#define MVS_CRQB_TO_ADDR(slot) ((slot) << MVS_CRQB_SHIFT) +#define MVS_ADDR_TO_CRQB(addr) (((addr) & MVS_CRQB_MASK) >> MVS_CRQB_SHIFT) +/* ePRD blocks. Up to 32 commands, Each 16byte aligned. */ +#define MVS_EPRD_OFFSET (MVS_CRQB_OFFSET + MVS_CRQB_SIZE * MVS_MAX_SLOTS) +#define MVS_EPRD_SIZE (MVS_SG_ENTRIES * 16) /* sizeof(struct mvs_eprd) */ +/* Request work area. */ +#define MVS_WORKRQ_SIZE (MVS_EPRD_OFFSET + MVS_EPRD_SIZE * MVS_MAX_SLOTS) + +/* EDMA Command Response Block (CRPB) Data */ +struct mvs_crpb { + uint16_t id; /* CRPB ID */ +#define MVS_CRPB_TAG_MASK 0x001F +#define MVS_CRPB_TAG_SHIFT 0 + uint16_t rspflg; /* CPRB Response Flags */ +#define MVS_CRPB_EDMASTS_MASK 0x007F +#define MVS_CRPB_EDMASTS_SHIFT 0 +#define MVS_CRPB_ATASTS_MASK 0xFF00 +#define MVS_CRPB_ATASTS_SHIFT 8 + uint32_t ts; /* CPRB Time Stamp */ +} __packed; + +/* Command response blocks. 32 commands. First 256byte aligned. */ +#define MVS_CRPB_OFFSET 0 +#define MVS_CRPB_SIZE sizeof(struct mvs_crpb) +#define MVS_CRPB_MASK 0x000000f8 +#define MVS_CRPB_SHIFT 3 +#define MVS_CRPB_TO_ADDR(slot) ((slot) << MVS_CRPB_SHIFT) +#define MVS_ADDR_TO_CRPB(addr) (((addr) & MVS_CRPB_MASK) >> MVS_CRPB_SHIFT) +/* Request work area. */ +#define MVS_WORKRP_SIZE (MVS_CRPB_OFFSET + MVS_CRPB_SIZE * MVS_MAX_SLOTS) + +/* misc defines */ +#define ATA_IRQ_RID 0 +#define ATA_INTR_FLAGS (INTR_MPSAFE|INTR_TYPE_BIO|INTR_ENTROPY) + +struct ata_dmaslot { + bus_dmamap_t data_map; /* Data DMA map */ + bus_addr_t addr; /* Data address */ + uint16_t len; /* Data size */ +}; + +/* structure holding DMA related information */ +struct mvs_dma { + bus_dma_tag_t workrq_tag; /* Request workspace DMA tag */ + bus_dmamap_t workrq_map; /* Request workspace DMA map */ + uint8_t *workrq; /* Request workspace */ + bus_addr_t workrq_bus; /* Request bus address */ + bus_dma_tag_t workrp_tag; /* Reply workspace DMA tag */ + bus_dmamap_t workrp_map; /* Reply workspace DMA map */ + uint8_t *workrp; /* Reply workspace */ + bus_addr_t workrp_bus; /* Reply bus address */ + bus_dma_tag_t data_tag; /* Data DMA tag */ +}; + +enum mvs_slot_states { + MVS_SLOT_EMPTY, + MVS_SLOT_LOADING, + MVS_SLOT_RUNNING, + MVS_SLOT_EXECUTING +}; + +struct mvs_slot { + device_t dev; /* Device handle */ + int slot; /* Number of this slot */ + int tag; /* Used command tag */ + enum mvs_slot_states state; /* Slot state */ + union ccb *ccb; /* CCB occupying slot */ + struct ata_dmaslot dma; /* DMA data of this slot */ + struct callout timeout; /* Execution timeout */ +}; + +struct mvs_device { + int revision; + int mode; + u_int bytecount; + u_int atapi; + u_int tags; + u_int caps; +}; + +enum mvs_edma_mode { + MVS_EDMA_UNKNOWN, + MVS_EDMA_OFF, + MVS_EDMA_ON, + MVS_EDMA_QUEUED, + MVS_EDMA_NCQ, +}; + +/* structure describing an ATA channel */ +struct mvs_channel { + device_t dev; /* Device handle */ + int unit; /* Physical channel */ + struct resource *r_mem; /* Memory of this channel */ + struct resource *r_irq; /* Interrupt of this channel */ + void *ih; /* Interrupt handle */ + struct mvs_dma dma; /* DMA data */ + struct cam_sim *sim; + struct cam_path *path; + int quirks; +#define MVS_Q_GENI 1 +#define MVS_Q_GENII 2 +#define MVS_Q_GENIIE 4 +#define MVS_Q_SOC 8 +#define MVS_Q_CT 16 + int pm_level; /* power management level */ + + struct mvs_slot slot[MVS_MAX_SLOTS]; + union ccb *hold[MVS_MAX_SLOTS]; + int holdtag[MVS_MAX_SLOTS]; /* Tags used for holden commands. */ + struct mtx mtx; /* state lock */ + int devices; /* What is present */ + int pm_present; /* PM presence reported */ + enum mvs_edma_mode curr_mode; /* Current EDMA mode */ + int fbs_enabled; /* FIS-based switching enabled */ + uint32_t oslots; /* Occupied slots */ + uint32_t otagspd[16]; /* Occupied device tags */ + uint32_t rslots; /* Running slots */ + uint32_t aslots; /* Slots with atomic commands */ + uint32_t eslots; /* Slots in error */ + uint32_t toslots; /* Slots in timeout */ + int numrslots; /* Number of running slots */ + int numrslotspd[16];/* Number of running slots per dev */ + int numpslots; /* Number of PIO slots */ + int numdslots; /* Number of DMA slots */ + int numtslots; /* Number of NCQ slots */ + int numtslotspd[16];/* Number of NCQ slots per dev */ + int numhslots; /* Number of holden slots */ + int readlog; /* Our READ LOG active */ + int fatalerr; /* Fatal error happend */ + int lastslot; /* Last used slot */ + int taggedtarget; /* Last tagged target */ + int out_idx; /* Next written CRQB */ + int in_idx; /* Next read CRPB */ + u_int transfersize; /* PIO transfer size */ + u_int donecount; /* PIO bytes sent/received */ + u_int basic_dma; /* Basic DMA used for ATAPI */ + u_int fake_busy; /* Fake busy bit after command submission */ + union ccb *frozen; /* Frozen command */ + struct callout pm_timer; /* Power management events */ + + struct mvs_device user[16]; /* User-specified settings */ + struct mvs_device curr[16]; /* Current settings */ +}; + +/* structure describing a MVS controller */ +struct mvs_controller { + device_t dev; + int r_rid; + struct resource *r_mem; + struct rman sc_iomem; + struct mvs_controller_irq { + struct resource *r_irq; + void *handle; + int r_irq_rid; + } irq; + int quirks; + int channels; + int ccc; /* CCC timeout */ + int cccc; /* CCC commands */ + struct mtx mtx; /* MIM access lock */ + int gmim; /* Globally wanted MIM bits */ + int pmim; /* Port wanted MIM bits */ + int mim; /* Current MIM bits */ + int msi; /* MSI enabled */ + int msia; /* MSI active */ + struct { + void (*function)(void *); + void *argument; + } interrupt[MVS_MAX_PORTS]; +}; + +enum mvs_err_type { + MVS_ERR_NONE, /* No error */ + MVS_ERR_INVALID, /* Error detected by us before submitting. */ + MVS_ERR_INNOCENT, /* Innocent victim. */ + MVS_ERR_TFE, /* Task File Error. */ + MVS_ERR_SATA, /* SATA error. */ + MVS_ERR_TIMEOUT, /* Command execution timeout. */ + MVS_ERR_NCQ, /* NCQ command error. CCB should be put on hold + * until READ LOG executed to reveal error. */ +}; + +struct mvs_intr_arg { + void *arg; + u_int cause; +}; + +extern devclass_t mvs_devclass; + +/* macros to hide busspace uglyness */ +#define ATA_INB(res, offset) \ + bus_read_1((res), (offset)) +#define ATA_INW(res, offset) \ + bus_read_2((res), (offset)) +#define ATA_INL(res, offset) \ + bus_read_4((res), (offset)) +#define ATA_INSW(res, offset, addr, count) \ + bus_read_multi_2((res), (offset), (addr), (count)) +#define ATA_INSW_STRM(res, offset, addr, count) \ + bus_read_multi_stream_2((res), (offset), (addr), (count)) +#define ATA_INSL(res, offset, addr, count) \ + bus_read_multi_4((res), (offset), (addr), (count)) +#define ATA_INSL_STRM(res, offset, addr, count) \ + bus_read_multi_stream_4((res), (offset), (addr), (count)) +#define ATA_OUTB(res, offset, value) \ + bus_write_1((res), (offset), (value)) +#define ATA_OUTW(res, offset, value) \ + bus_write_2((res), (offset), (value)) +#define ATA_OUTL(res, offset, value) \ + bus_write_4((res), (offset), (value)); +#define ATA_OUTSW(res, offset, addr, count) \ + bus_write_multi_2((res), (offset), (addr), (count)) +#define ATA_OUTSW_STRM(res, offset, addr, count) \ + bus_write_multi_stream_2((res), (offset), (addr), (count)) +#define ATA_OUTSL(res, offset, addr, count) \ + bus_write_multi_4((res), (offset), (addr), (count)) +#define ATA_OUTSL_STRM(res, offset, addr, count) \ + bus_write_multi_stream_4((res), (offset), (addr), (count)) diff --git a/sys/dev/mvs/mvs_if.m b/sys/dev/mvs/mvs_if.m new file mode 100644 index 00000000000..e7442192065 --- /dev/null +++ b/sys/dev/mvs/mvs_if.m @@ -0,0 +1,34 @@ +# Copyright (c) 2010 Alexander Motin +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer, +# without modification, immediately at the beginning of the file. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# $FreeBSD$ + +INTERFACE mvs; + +METHOD void edma { + device_t dev; + device_t child; + int mode; +}; + diff --git a/sys/dev/mvs/mvs_pci.c b/sys/dev/mvs/mvs_pci.c new file mode 100644 index 00000000000..4fae627c6aa --- /dev/null +++ b/sys/dev/mvs/mvs_pci.c @@ -0,0 +1,507 @@ +/*- + * Copyright (c) 2010 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mvs.h" + +/* local prototypes */ +static int mvs_setup_interrupt(device_t dev); +static void mvs_intr(void *data); +static int mvs_suspend(device_t dev); +static int mvs_resume(device_t dev); +static int mvs_ctlr_setup(device_t dev); + +static struct { + uint32_t id; + uint8_t rev; + const char *name; + int ports; + int quirks; +} mvs_ids[] = { + {0x504011ab, 0x00, "Marvell 88SX5040", 4, MVS_Q_GENI}, + {0x504111ab, 0x00, "Marvell 88SX5041", 4, MVS_Q_GENI}, + {0x508011ab, 0x00, "Marvell 88SX5080", 8, MVS_Q_GENI}, + {0x508111ab, 0x00, "Marvell 88SX5081", 8, MVS_Q_GENI}, + {0x604011ab, 0x00, "Marvell 88SX6040", 4, MVS_Q_GENII}, + {0x604111ab, 0x00, "Marvell 88SX6041", 4, MVS_Q_GENII}, + {0x604211ab, 0x00, "Marvell 88SX6042", 4, MVS_Q_GENIIE}, + {0x608011ab, 0x00, "Marvell 88SX6080", 8, MVS_Q_GENII}, + {0x608111ab, 0x00, "Marvell 88SX6081", 8, MVS_Q_GENII}, + {0x704211ab, 0x00, "Marvell 88SX7042", 4, MVS_Q_GENIIE|MVS_Q_CT}, + {0x02419005, 0x00, "Adaptec 1420SA", 4, MVS_Q_GENII}, + {0x02439005, 0x00, "Adaptec 1430SA", 4, MVS_Q_GENIIE|MVS_Q_CT}, + {0x00000000, 0x00, NULL, 0, 0} +}; + +static int +mvs_probe(device_t dev) +{ + char buf[64]; + int i; + uint32_t devid = pci_get_devid(dev); + uint8_t revid = pci_get_revid(dev); + + for (i = 0; mvs_ids[i].id != 0; i++) { + if (mvs_ids[i].id == devid && + mvs_ids[i].rev <= revid) { + snprintf(buf, sizeof(buf), "%s SATA controller", + mvs_ids[i].name); + device_set_desc_copy(dev, buf); + return (BUS_PROBE_VENDOR); + } + } + return (ENXIO); +} + +static int +mvs_attach(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + device_t child; + int error, unit, i; + uint32_t devid = pci_get_devid(dev); + uint8_t revid = pci_get_revid(dev); + + ctlr->dev = dev; + i = 0; + while (mvs_ids[i].id != 0 && + (mvs_ids[i].id != devid || + mvs_ids[i].rev > revid)) + i++; + ctlr->channels = mvs_ids[i].ports; + ctlr->quirks = mvs_ids[i].quirks; + resource_int_value(device_get_name(dev), + device_get_unit(dev), "ccc", &ctlr->ccc); + ctlr->cccc = 8; + resource_int_value(device_get_name(dev), + device_get_unit(dev), "cccc", &ctlr->cccc); + if (ctlr->ccc == 0 || ctlr->cccc == 0) { + ctlr->ccc = 0; + ctlr->cccc = 0; + } + if (ctlr->ccc > 100000) + ctlr->ccc = 100000; + device_printf(dev, + "Gen-%s, %d %sGbps ports, Port Multiplier %s%s\n", + ((ctlr->quirks & MVS_Q_GENI) ? "I" : + ((ctlr->quirks & MVS_Q_GENII) ? "II" : "IIe")), + ctlr->channels, + ((ctlr->quirks & MVS_Q_GENI) ? "1.5" : "3"), + ((ctlr->quirks & MVS_Q_GENI) ? + "not supported" : "supported"), + ((ctlr->quirks & MVS_Q_GENIIE) ? + " with FBS" : "")); + mtx_init(&ctlr->mtx, "MVS controller lock", NULL, MTX_DEF); + /* We should have a memory BAR(0). */ + ctlr->r_rid = PCIR_BAR(0); + if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &ctlr->r_rid, RF_ACTIVE))) + return ENXIO; + /* Setup our own memory management for channels. */ + ctlr->sc_iomem.rm_type = RMAN_ARRAY; + ctlr->sc_iomem.rm_descr = "I/O memory addresses"; + if ((error = rman_init(&ctlr->sc_iomem)) != 0) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + return (error); + } + if ((error = rman_manage_region(&ctlr->sc_iomem, + rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + rman_fini(&ctlr->sc_iomem); + return (error); + } + pci_enable_busmaster(dev); + mvs_ctlr_setup(dev); + /* Setup interrupts. */ + if (mvs_setup_interrupt(dev)) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + rman_fini(&ctlr->sc_iomem); + return ENXIO; + } + /* Attach all channels on this controller */ + for (unit = 0; unit < ctlr->channels; unit++) { + child = device_add_child(dev, "mvsch", -1); + if (child == NULL) + device_printf(dev, "failed to add channel device\n"); + else + device_set_ivars(child, (void *)(intptr_t)unit); + } + bus_generic_attach(dev); + return 0; +} + +static int +mvs_detach(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + device_t *children; + int nchildren, i; + + /* Detach & delete all children */ + if (!device_get_children(dev, &children, &nchildren)) { + for (i = 0; i < nchildren; i++) + device_delete_child(dev, children[i]); + free(children, M_TEMP); + } + /* Free interrupt. */ + if (ctlr->irq.r_irq) { + bus_teardown_intr(dev, ctlr->irq.r_irq, + ctlr->irq.handle); + bus_release_resource(dev, SYS_RES_IRQ, + ctlr->irq.r_irq_rid, ctlr->irq.r_irq); + } + pci_release_msi(dev); + /* Free memory. */ + rman_fini(&ctlr->sc_iomem); + if (ctlr->r_mem) + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + mtx_destroy(&ctlr->mtx); + return (0); +} + +static int +mvs_ctlr_setup(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int i, ccc = ctlr->ccc, cccc = ctlr->cccc, ccim = 0; + + /* Mask chip interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0x00000000); + /* Mask PCI interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x00000000); + /* Clear PCI interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_PCIIC, 0x00000000); + if (ccc && bootverbose) { + device_printf(dev, + "CCC with %dus/%dcmd enabled\n", + ctlr->ccc, ctlr->cccc); + } + ccc *= 150; + /* Configure chip-global CCC */ + if (ctlr->channels > 4 && (ctlr->quirks & MVS_Q_GENI) == 0) { + ATA_OUTL(ctlr->r_mem, CHIP_ICT, cccc); + ATA_OUTL(ctlr->r_mem, CHIP_ITT, ccc); + ATA_OUTL(ctlr->r_mem, CHIP_ICC, ~CHIP_ICC_ALL_PORTS); + if (ccc) + ccim |= IC_ALL_PORTS_COAL_DONE; + ccc = 0; + cccc = 0; + } + for (i = 0; i < ctlr->channels / 4; i++) { + /* Configure per-HC CCC */ + ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_ICT, cccc); + ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_ITT, ccc); + if (ccc) + ccim |= (IC_HC0_COAL_DONE << (i * IC_HC_SHIFT)); + /* Clear HC interrupts */ + ATA_OUTL(ctlr->r_mem, HC_BASE(i) + HC_IC, 0x00000000); + } + /* Enable chip interrupts */ + ctlr->gmim = (ccim ? ccim : (IC_DONE_HC0 | IC_DONE_HC1)) | + IC_ERR_HC0 | IC_ERR_HC1; + ctlr->mim = ctlr->gmim | ctlr->pmim; + ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim); + /* Enable PCI interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x007fffff); + return (0); +} + +static void +mvs_edma(device_t dev, device_t child, int mode) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = ((struct mvs_channel *)device_get_softc(child))->unit; + int bit = IC_DONE_IRQ << (unit * 2 + unit / 4) ; + + if (ctlr->ccc == 0) + return; + /* CCC is not working for non-EDMA mode. Unmask device interrupts. */ + mtx_lock(&ctlr->mtx); + if (mode == MVS_EDMA_OFF) + ctlr->pmim |= bit; + else + ctlr->pmim &= ~bit; + ctlr->mim = ctlr->gmim | ctlr->pmim; + if (!ctlr->msia) + ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim); + mtx_unlock(&ctlr->mtx); +} + +static int +mvs_suspend(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + + bus_generic_suspend(dev); + /* Mask chip interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0x00000000); + /* Mask PCI interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_PCIIM, 0x00000000); + return 0; +} + +static int +mvs_resume(device_t dev) +{ + + mvs_ctlr_setup(dev); + return (bus_generic_resume(dev)); +} + +static int +mvs_setup_interrupt(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int msi = 0; + + /* Process hints. */ + resource_int_value(device_get_name(dev), + device_get_unit(dev), "msi", &msi); + if (msi < 0) + msi = 0; + else if (msi > 0) + msi = min(1, pci_msi_count(dev)); + /* Allocate MSI if needed/present. */ + if (msi && pci_alloc_msi(dev, &msi) != 0) + msi = 0; + ctlr->msi = msi; + /* Allocate all IRQs. */ + ctlr->irq.r_irq_rid = msi ? 1 : 0; + if (!(ctlr->irq.r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &ctlr->irq.r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) { + device_printf(dev, "unable to map interrupt\n"); + return (ENXIO); + } + if ((bus_setup_intr(dev, ctlr->irq.r_irq, ATA_INTR_FLAGS, NULL, + mvs_intr, ctlr, &ctlr->irq.handle))) { + device_printf(dev, "unable to setup interrupt\n"); + bus_release_resource(dev, SYS_RES_IRQ, + ctlr->irq.r_irq_rid, ctlr->irq.r_irq); + ctlr->irq.r_irq = 0; + return (ENXIO); + } + return (0); +} + +/* + * Common case interrupt handler. + */ +static void +mvs_intr(void *data) +{ + struct mvs_controller *ctlr = data; + struct mvs_intr_arg arg; + void (*function)(void *); + int p; + u_int32_t ic, aic; + + ic = ATA_INL(ctlr->r_mem, CHIP_MIC); +//device_printf(ctlr->dev, "irq MIC:%08x\n", ic); + if (ctlr->msi) { + /* We have to to mask MSI during processing. */ + mtx_lock(&ctlr->mtx); + ATA_OUTL(ctlr->r_mem, CHIP_MIM, 0); + ctlr->msia = 1; /* Deny MIM update during processing. */ + mtx_unlock(&ctlr->mtx); + } else if (ic == 0) + return; + /* Acknowledge all-ports CCC interrupt. */ + if (ic & IC_ALL_PORTS_COAL_DONE) + ATA_OUTL(ctlr->r_mem, CHIP_ICC, ~CHIP_ICC_ALL_PORTS); + for (p = 0; p < ctlr->channels; p++) { + if ((p & 3) == 0) { + if (p != 0) + ic >>= 1; + if ((ic & IC_HC0) == 0) { + p += 3; + ic >>= 8; + continue; + } + /* Acknowledge interrupts of this HC. */ + aic = 0; + if (ic & (IC_DONE_IRQ << 0)) + aic |= HC_IC_DONE(0) | HC_IC_DEV(0); + if (ic & (IC_DONE_IRQ << 2)) + aic |= HC_IC_DONE(1) | HC_IC_DEV(1); + if (ic & (IC_DONE_IRQ << 4)) + aic |= HC_IC_DONE(2) | HC_IC_DEV(2); + if (ic & (IC_DONE_IRQ << 6)) + aic |= HC_IC_DONE(3) | HC_IC_DEV(3); + if (ic & IC_HC0_COAL_DONE) + aic |= HC_IC_COAL; + ATA_OUTL(ctlr->r_mem, HC_BASE(p == 4) + HC_IC, ~aic); + } + /* Call per-port interrupt handler. */ + arg.cause = ic & (IC_ERR_IRQ|IC_DONE_IRQ); + if ((arg.cause != 0) && + (function = ctlr->interrupt[p].function)) { + arg.arg = ctlr->interrupt[p].argument; + function(&arg); + } + ic >>= 2; + } + if (ctlr->msi) { + /* Unmasking MSI triggers next interrupt, if needed. */ + mtx_lock(&ctlr->mtx); + ctlr->msia = 0; /* Allow MIM update. */ + ATA_OUTL(ctlr->r_mem, CHIP_MIM, ctlr->mim); + mtx_unlock(&ctlr->mtx); + } +} + +static struct resource * +mvs_alloc_resource(device_t dev, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = ((struct mvs_channel *)device_get_softc(child))->unit; + struct resource *res = NULL; + int offset = HC_BASE(unit >> 2) + PORT_BASE(unit & 0x03); + long st; + + switch (type) { + case SYS_RES_MEMORY: + st = rman_get_start(ctlr->r_mem); + res = rman_reserve_resource(&ctlr->sc_iomem, st + offset, + st + offset + PORT_SIZE - 1, PORT_SIZE, RF_ACTIVE, child); + if (res) { + bus_space_handle_t bsh; + bus_space_tag_t bst; + bsh = rman_get_bushandle(ctlr->r_mem); + bst = rman_get_bustag(ctlr->r_mem); + bus_space_subregion(bst, bsh, offset, PORT_SIZE, &bsh); + rman_set_bushandle(res, bsh); + rman_set_bustag(res, bst); + } + break; + case SYS_RES_IRQ: + if (*rid == ATA_IRQ_RID) + res = ctlr->irq.r_irq; + break; + } + return (res); +} + +static int +mvs_release_resource(device_t dev, device_t child, int type, int rid, + struct resource *r) +{ + + switch (type) { + case SYS_RES_MEMORY: + rman_release_resource(r); + return (0); + case SYS_RES_IRQ: + if (rid != ATA_IRQ_RID) + return ENOENT; + return (0); + } + return (EINVAL); +} + +static int +mvs_setup_intr(device_t dev, device_t child, struct resource *irq, + int flags, driver_filter_t *filter, driver_intr_t *function, + void *argument, void **cookiep) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = (intptr_t)device_get_ivars(child); + + if (filter != NULL) { + printf("mvs.c: we cannot use a filter here\n"); + return (EINVAL); + } + ctlr->interrupt[unit].function = function; + ctlr->interrupt[unit].argument = argument; + return (0); +} + +static int +mvs_teardown_intr(device_t dev, device_t child, struct resource *irq, + void *cookie) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = (intptr_t)device_get_ivars(child); + + ctlr->interrupt[unit].function = NULL; + ctlr->interrupt[unit].argument = NULL; + return (0); +} + +static int +mvs_print_child(device_t dev, device_t child) +{ + int retval; + + retval = bus_print_child_header(dev, child); + retval += printf(" at channel %d", + (int)(intptr_t)device_get_ivars(child)); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static device_method_t mvs_methods[] = { + DEVMETHOD(device_probe, mvs_probe), + DEVMETHOD(device_attach, mvs_attach), + DEVMETHOD(device_detach, mvs_detach), + DEVMETHOD(device_suspend, mvs_suspend), + DEVMETHOD(device_resume, mvs_resume), + DEVMETHOD(bus_print_child, mvs_print_child), + DEVMETHOD(bus_alloc_resource, mvs_alloc_resource), + DEVMETHOD(bus_release_resource, mvs_release_resource), + DEVMETHOD(bus_setup_intr, mvs_setup_intr), + DEVMETHOD(bus_teardown_intr,mvs_teardown_intr), + DEVMETHOD(mvs_edma, mvs_edma), + { 0, 0 } +}; +static driver_t mvs_driver = { + "mvs", + mvs_methods, + sizeof(struct mvs_controller) +}; +DRIVER_MODULE(mvs, pci, mvs_driver, mvs_devclass, 0, 0); +MODULE_VERSION(mvs, 1); +MODULE_DEPEND(mvs, cam, 1, 1, 1); + diff --git a/sys/dev/mvs/mvs_soc.c b/sys/dev/mvs/mvs_soc.c new file mode 100644 index 00000000000..298a87336a7 --- /dev/null +++ b/sys/dev/mvs/mvs_soc.c @@ -0,0 +1,437 @@ +/*- + * Copyright (c) 2010 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mvs.h" + +/* local prototypes */ +static int mvs_setup_interrupt(device_t dev); +static void mvs_intr(void *data); +static int mvs_suspend(device_t dev); +static int mvs_resume(device_t dev); +static int mvs_ctlr_setup(device_t dev); + +static struct { + uint32_t id; + uint8_t rev; + const char *name; + int ports; + int quirks; +} mvs_ids[] = { + {MV_DEV_88F5182, 0x00, "Marvell 88F5182", 2, MVS_Q_GENIIE|MVS_Q_SOC}, + {MV_DEV_88F6281, 0x00, "Marvell 88F6281", 2, MVS_Q_GENIIE|MVS_Q_SOC}, + {MV_DEV_MV78100, 0x00, "Marvell MV78100", 2, MVS_Q_GENIIE|MVS_Q_SOC}, + {MV_DEV_MV78100_Z0, 0x00,"Marvell MV78100", 2, MVS_Q_GENIIE|MVS_Q_SOC}, + {0, 0x00, NULL, 0, 0} +}; + +static int +mvs_probe(device_t dev) +{ + char buf[64]; + int i; + uint32_t devid, revid; + + soc_id(&devid, &revid); + for (i = 0; mvs_ids[i].id != 0; i++) { + if (mvs_ids[i].id == devid && + mvs_ids[i].rev <= revid) { + snprintf(buf, sizeof(buf), "%s SATA controller", + mvs_ids[i].name); + device_set_desc_copy(dev, buf); + return (BUS_PROBE_VENDOR); + } + } + return (ENXIO); +} + +static int +mvs_attach(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + device_t child; + int error, unit, i; + uint32_t devid, revid; + + soc_id(&devid, &revid); + ctlr->dev = dev; + i = 0; + while (mvs_ids[i].id != 0 && + (mvs_ids[i].id != devid || + mvs_ids[i].rev > revid)) + i++; + ctlr->channels = mvs_ids[i].ports; + ctlr->quirks = mvs_ids[i].quirks; + resource_int_value(device_get_name(dev), + device_get_unit(dev), "ccc", &ctlr->ccc); + ctlr->cccc = 8; + resource_int_value(device_get_name(dev), + device_get_unit(dev), "cccc", &ctlr->cccc); + if (ctlr->ccc == 0 || ctlr->cccc == 0) { + ctlr->ccc = 0; + ctlr->cccc = 0; + } + if (ctlr->ccc > 100000) + ctlr->ccc = 100000; + device_printf(dev, + "Gen-%s, %d %sGbps ports, Port Multiplier %s%s\n", + ((ctlr->quirks & MVS_Q_GENI) ? "I" : + ((ctlr->quirks & MVS_Q_GENII) ? "II" : "IIe")), + ctlr->channels, + ((ctlr->quirks & MVS_Q_GENI) ? "1.5" : "3"), + ((ctlr->quirks & MVS_Q_GENI) ? + "not supported" : "supported"), + ((ctlr->quirks & MVS_Q_GENIIE) ? + " with FBS" : "")); + mtx_init(&ctlr->mtx, "MVS controller lock", NULL, MTX_DEF); + /* We should have a memory BAR(0). */ + ctlr->r_rid = 0; + if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &ctlr->r_rid, RF_ACTIVE))) + return ENXIO; + /* Setup our own memory management for channels. */ + ctlr->sc_iomem.rm_type = RMAN_ARRAY; + ctlr->sc_iomem.rm_descr = "I/O memory addresses"; + if ((error = rman_init(&ctlr->sc_iomem)) != 0) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + return (error); + } + if ((error = rman_manage_region(&ctlr->sc_iomem, + rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + rman_fini(&ctlr->sc_iomem); + return (error); + } + mvs_ctlr_setup(dev); + /* Setup interrupts. */ + if (mvs_setup_interrupt(dev)) { + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + rman_fini(&ctlr->sc_iomem); + return ENXIO; + } + /* Attach all channels on this controller */ + for (unit = 0; unit < ctlr->channels; unit++) { + child = device_add_child(dev, "mvsch", -1); + if (child == NULL) + device_printf(dev, "failed to add channel device\n"); + else + device_set_ivars(child, (void *)(intptr_t)unit); + } + bus_generic_attach(dev); + return 0; +} + +static int +mvs_detach(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + device_t *children; + int nchildren, i; + + /* Detach & delete all children */ + if (!device_get_children(dev, &children, &nchildren)) { + for (i = 0; i < nchildren; i++) + device_delete_child(dev, children[i]); + free(children, M_TEMP); + } + /* Free interrupt. */ + if (ctlr->irq.r_irq) { + bus_teardown_intr(dev, ctlr->irq.r_irq, + ctlr->irq.handle); + bus_release_resource(dev, SYS_RES_IRQ, + ctlr->irq.r_irq_rid, ctlr->irq.r_irq); + } + /* Free memory. */ + rman_fini(&ctlr->sc_iomem); + if (ctlr->r_mem) + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + mtx_destroy(&ctlr->mtx); + return (0); +} + +static int +mvs_ctlr_setup(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int ccc = ctlr->ccc, cccc = ctlr->cccc, ccim = 0; + + /* Mask chip interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_SOC_MIM, 0x00000000); + /* Clear HC interrupts */ + ATA_OUTL(ctlr->r_mem, HC_IC, 0x00000000); + /* Clear chip interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_SOC_MIC, 0); + /* Configure per-HC CCC */ + if (ccc && bootverbose) { + device_printf(dev, + "CCC with %dus/%dcmd enabled\n", + ctlr->ccc, ctlr->cccc); + } + ccc *= 150; + ATA_OUTL(ctlr->r_mem, HC_ICT, cccc); + ATA_OUTL(ctlr->r_mem, HC_ITT, ccc); + if (ccc) + ccim |= IC_HC0_COAL_DONE; + /* Enable chip interrupts */ + ctlr->gmim = (ccc ? IC_HC0_COAL_DONE : IC_DONE_HC0) | IC_ERR_HC0; + ATA_OUTL(ctlr->r_mem, CHIP_SOC_MIM, ctlr->gmim | ctlr->pmim); + return (0); +} + +static void +mvs_edma(device_t dev, device_t child, int mode) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = ((struct mvs_channel *)device_get_softc(child))->unit; + int bit = IC_DONE_IRQ << (unit * 2); + + if (ctlr->ccc == 0) + return; + /* CCC is not working for non-EDMA mode. Unmask device interrupts. */ + mtx_lock(&ctlr->mtx); + if (mode == MVS_EDMA_OFF) + ctlr->pmim |= bit; + else + ctlr->pmim &= ~bit; + ATA_OUTL(ctlr->r_mem, CHIP_SOC_MIM, ctlr->gmim | ctlr->pmim); + mtx_unlock(&ctlr->mtx); +} + +static int +mvs_suspend(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + + bus_generic_suspend(dev); + /* Mask chip interrupts */ + ATA_OUTL(ctlr->r_mem, CHIP_SOC_MIM, 0x00000000); + return 0; +} + +static int +mvs_resume(device_t dev) +{ + + mvs_ctlr_setup(dev); + return (bus_generic_resume(dev)); +} + +static int +mvs_setup_interrupt(device_t dev) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + + /* Allocate all IRQs. */ + ctlr->irq.r_irq_rid = 0; + if (!(ctlr->irq.r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &ctlr->irq.r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) { + device_printf(dev, "unable to map interrupt\n"); + return (ENXIO); + } + if ((bus_setup_intr(dev, ctlr->irq.r_irq, ATA_INTR_FLAGS, NULL, + mvs_intr, ctlr, &ctlr->irq.handle))) { + device_printf(dev, "unable to setup interrupt\n"); + bus_release_resource(dev, SYS_RES_IRQ, + ctlr->irq.r_irq_rid, ctlr->irq.r_irq); + ctlr->irq.r_irq = 0; + return (ENXIO); + } + return (0); +} + +/* + * Common case interrupt handler. + */ +static void +mvs_intr(void *data) +{ + struct mvs_controller *ctlr = data; + struct mvs_intr_arg arg; + void (*function)(void *); + int p; + u_int32_t ic, aic; + + ic = ATA_INL(ctlr->r_mem, CHIP_SOC_MIC); +//device_printf(ctlr->dev, "irq MIC:%08x\n", ic); + if ((ic & IC_HC0) == 0) + return; + /* Acknowledge interrupts of this HC. */ + aic = 0; + if (ic & (IC_DONE_IRQ << 0)) + aic |= HC_IC_DONE(0) | HC_IC_DEV(0); + if (ic & (IC_DONE_IRQ << 2)) + aic |= HC_IC_DONE(1) | HC_IC_DEV(1); + if (ic & (IC_DONE_IRQ << 4)) + aic |= HC_IC_DONE(2) | HC_IC_DEV(2); + if (ic & (IC_DONE_IRQ << 6)) + aic |= HC_IC_DONE(3) | HC_IC_DEV(3); + if (ic & IC_HC0_COAL_DONE) + aic |= HC_IC_COAL; + ATA_OUTL(ctlr->r_mem, HC_IC, ~aic); + /* Call per-port interrupt handler. */ + for (p = 0; p < ctlr->channels; p++) { + arg.cause = ic & (IC_ERR_IRQ|IC_DONE_IRQ); + if ((arg.cause != 0) && + (function = ctlr->interrupt[p].function)) { + arg.arg = ctlr->interrupt[p].argument; + function(&arg); + } + ic >>= 2; + } +} + +static struct resource * +mvs_alloc_resource(device_t dev, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = ((struct mvs_channel *)device_get_softc(child))->unit; + struct resource *res = NULL; + int offset = PORT_BASE(unit & 0x03); + long st; + + switch (type) { + case SYS_RES_MEMORY: + st = rman_get_start(ctlr->r_mem); + res = rman_reserve_resource(&ctlr->sc_iomem, st + offset, + st + offset + PORT_SIZE - 1, PORT_SIZE, RF_ACTIVE, child); + if (res) { + bus_space_handle_t bsh; + bus_space_tag_t bst; + bsh = rman_get_bushandle(ctlr->r_mem); + bst = rman_get_bustag(ctlr->r_mem); + bus_space_subregion(bst, bsh, offset, PORT_SIZE, &bsh); + rman_set_bushandle(res, bsh); + rman_set_bustag(res, bst); + } + break; + case SYS_RES_IRQ: + if (*rid == ATA_IRQ_RID) + res = ctlr->irq.r_irq; + break; + } + return (res); +} + +static int +mvs_release_resource(device_t dev, device_t child, int type, int rid, + struct resource *r) +{ + + switch (type) { + case SYS_RES_MEMORY: + rman_release_resource(r); + return (0); + case SYS_RES_IRQ: + if (rid != ATA_IRQ_RID) + return ENOENT; + return (0); + } + return (EINVAL); +} + +static int +mvs_setup_intr(device_t dev, device_t child, struct resource *irq, + int flags, driver_filter_t *filter, driver_intr_t *function, + void *argument, void **cookiep) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = (intptr_t)device_get_ivars(child); + + if (filter != NULL) { + printf("mvs.c: we cannot use a filter here\n"); + return (EINVAL); + } + ctlr->interrupt[unit].function = function; + ctlr->interrupt[unit].argument = argument; + return (0); +} + +static int +mvs_teardown_intr(device_t dev, device_t child, struct resource *irq, + void *cookie) +{ + struct mvs_controller *ctlr = device_get_softc(dev); + int unit = (intptr_t)device_get_ivars(child); + + ctlr->interrupt[unit].function = NULL; + ctlr->interrupt[unit].argument = NULL; + return (0); +} + +static int +mvs_print_child(device_t dev, device_t child) +{ + int retval; + + retval = bus_print_child_header(dev, child); + retval += printf(" at channel %d", + (int)(intptr_t)device_get_ivars(child)); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static device_method_t mvs_methods[] = { + DEVMETHOD(device_probe, mvs_probe), + DEVMETHOD(device_attach, mvs_attach), + DEVMETHOD(device_detach, mvs_detach), + DEVMETHOD(device_suspend, mvs_suspend), + DEVMETHOD(device_resume, mvs_resume), + DEVMETHOD(bus_print_child, mvs_print_child), + DEVMETHOD(bus_alloc_resource, mvs_alloc_resource), + DEVMETHOD(bus_release_resource, mvs_release_resource), + DEVMETHOD(bus_setup_intr, mvs_setup_intr), + DEVMETHOD(bus_teardown_intr,mvs_teardown_intr), + DEVMETHOD(mvs_edma, mvs_edma), + { 0, 0 } +}; +static driver_t mvs_driver = { + "sata", + mvs_methods, + sizeof(struct mvs_controller) +}; +DRIVER_MODULE(sata, mbus, mvs_driver, mvs_devclass, 0, 0); +MODULE_VERSION(sata, 1); + diff --git a/sys/dev/mwl/if_mwl.c b/sys/dev/mwl/if_mwl.c index e4e469b5bcf..662f201c195 100644 --- a/sys/dev/mwl/if_mwl.c +++ b/sys/dev/mwl/if_mwl.c @@ -404,8 +404,8 @@ mwl_attach(uint16_t devid, struct mwl_softc *sc) ifp->if_start = mwl_start; ifp->if_ioctl = mwl_ioctl; ifp->if_init = mwl_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index d7f58255241..e5a34bc70ed 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -883,6 +883,9 @@ mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) case MXGEFW_CMD_ERROR_BUSY: err = EBUSY; break; + case MXGEFW_CMD_ERROR_I2C_ABSENT: + err = ENXIO; + break; default: device_printf(sc->dev, "mxge: command %d " @@ -2397,10 +2400,7 @@ mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) mxge_rx_ring_t *rx = &ss->rx_big; int cnt, err, i; - if (rx->cl_size == MCLBYTES) - m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); - else - m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); + m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); if (m == NULL) { rx->alloc_fail++; err = ENOBUFS; @@ -2782,37 +2782,25 @@ static struct mxge_media_type mxge_sfp_media_types[] = }; static void -mxge_set_media(mxge_softc_t *sc, int type) +mxge_media_set(mxge_softc_t *sc, int media_type) { - sc->media_flags |= type; - ifmedia_add(&sc->media, sc->media_flags, 0, NULL); - ifmedia_set(&sc->media, sc->media_flags); + + + ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, + 0, NULL); + ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); + sc->current_media = media_type; + sc->media.ifm_media = sc->media.ifm_cur->ifm_media; } - -/* - * Determine the media type for a NIC. Some XFPs will identify - * themselves only when their link is up, so this is initiated via a - * link up interrupt. However, this can potentially take up to - * several milliseconds, so it is run via the watchdog routine, rather - * than in the interrupt handler itself. This need only be done - * once, not each time the link is up. - */ static void -mxge_media_probe(mxge_softc_t *sc) +mxge_media_init(mxge_softc_t *sc) { - mxge_cmd_t cmd; - char *cage_type; char *ptr; - struct mxge_media_type *mxge_media_types = NULL; - int i, err, ms, mxge_media_type_entries; - uint32_t byte; + int i; - sc->need_media_probe = 0; - - /* if we've already set a media type, we're done */ - if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) - return; + ifmedia_removeall(&sc->media); + mxge_media_set(sc, IFM_AUTO); /* * parse the product code to deterimine the interface type @@ -2823,6 +2811,7 @@ mxge_media_probe(mxge_softc_t *sc) ptr = sc->product_code_string; if (ptr == NULL) { device_printf(sc->dev, "Missing product code\n"); + return; } for (i = 0; i < 3; i++, ptr++) { @@ -2835,17 +2824,44 @@ mxge_media_probe(mxge_softc_t *sc) } if (*ptr == 'C') { /* -C is CX4 */ - mxge_set_media(sc, IFM_10G_CX4); - return; - } - else if (*ptr == 'Q') { + sc->connector = MXGE_CX4; + mxge_media_set(sc, IFM_10G_CX4); + } else if (*ptr == 'Q') { /* -Q is Quad Ribbon Fiber */ + sc->connector = MXGE_QRF; device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); /* FreeBSD has no media type for Quad ribbon fiber */ - return; + } else if (*ptr == 'R') { + /* -R is XFP */ + sc->connector = MXGE_XFP; + } else if (*ptr == 'S' || *(ptr +1) == 'S') { + /* -S or -2S is SFP+ */ + sc->connector = MXGE_SFP; + } else { + device_printf(sc->dev, "Unknown media type: %c\n", *ptr); } +} - if (*ptr == 'R') { +/* + * Determine the media type for a NIC. Some XFPs will identify + * themselves only when their link is up, so this is initiated via a + * link up interrupt. However, this can potentially take up to + * several milliseconds, so it is run via the watchdog routine, rather + * than in the interrupt handler itself. + */ +static void +mxge_media_probe(mxge_softc_t *sc) +{ + mxge_cmd_t cmd; + char *cage_type; + + struct mxge_media_type *mxge_media_types = NULL; + int i, err, ms, mxge_media_type_entries; + uint32_t byte; + + sc->need_media_probe = 0; + + if (sc->connector == MXGE_XFP) { /* -R is XFP */ mxge_media_types = mxge_xfp_media_types; mxge_media_type_entries = @@ -2853,9 +2869,7 @@ mxge_media_probe(mxge_softc_t *sc) sizeof (mxge_xfp_media_types[0]); byte = MXGE_XFP_COMPLIANCE_BYTE; cage_type = "XFP"; - } - - if (*ptr == 'S' || *(ptr +1) == 'S') { + } else if (sc->connector == MXGE_SFP) { /* -S or -2S is SFP+ */ mxge_media_types = mxge_sfp_media_types; mxge_media_type_entries = @@ -2863,10 +2877,8 @@ mxge_media_probe(mxge_softc_t *sc) sizeof (mxge_sfp_media_types[0]); cage_type = "SFP+"; byte = 3; - } - - if (mxge_media_types == NULL) { - device_printf(sc->dev, "Unknown media type: %c\n", *ptr); + } else { + /* nothing to do; media type cannot change */ return; } @@ -2909,7 +2921,10 @@ mxge_media_probe(mxge_softc_t *sc) if (mxge_verbose) device_printf(sc->dev, "%s:%s\n", cage_type, mxge_media_types[0].name); - mxge_set_media(sc, mxge_media_types[0].flag); + if (sc->current_media != mxge_media_types[0].flag) { + mxge_media_init(sc); + mxge_media_set(sc, mxge_media_types[0].flag); + } return; } for (i = 1; i < mxge_media_type_entries; i++) { @@ -2919,12 +2934,16 @@ mxge_media_probe(mxge_softc_t *sc) cage_type, mxge_media_types[i].name); - mxge_set_media(sc, mxge_media_types[i].flag); + if (sc->current_media != mxge_media_types[i].flag) { + mxge_media_init(sc); + mxge_media_set(sc, mxge_media_types[i].flag); + } return; } } - device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, - cmd.data0); + if (mxge_verbose) + device_printf(sc->dev, "%s media 0x%x unknown\n", + cage_type, cmd.data0); return; } @@ -2988,10 +3007,12 @@ mxge_intr(void *arg) sc->link_state = stats->link_up; if (sc->link_state) { if_link_state_change(sc->ifp, LINK_STATE_UP); + sc->ifp->if_baudrate = IF_Gbps(10UL); if (mxge_verbose) device_printf(sc->dev, "link up\n"); } else { if_link_state_change(sc->ifp, LINK_STATE_DOWN); + sc->ifp->if_baudrate = 0; if (mxge_verbose) device_printf(sc->dev, "link down\n"); } @@ -4026,9 +4047,9 @@ mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) if (sc == NULL) return; ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER | IFM_FDX; ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; - ifmr->ifm_active = IFM_AUTO | IFM_ETHER; - ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; + ifmr->ifm_active |= sc->current_media; } static int @@ -4135,6 +4156,9 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) break; case SIOCGIFMEDIA: + mtx_lock(&sc->driver_mtx); + mxge_media_probe(sc); + mtx_unlock(&sc->driver_mtx); err = ifmedia_ioctl(ifp, (struct ifreq *)data, &sc->media, command); break; @@ -4732,7 +4756,7 @@ mxge_attach(device_t dev) ifp->if_baudrate = IF_Gbps(10UL); ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | - IFCAP_VLAN_MTU; + IFCAP_VLAN_MTU | IFCAP_LINKSTATE; #ifdef INET ifp->if_capabilities |= IFCAP_LRO; #endif @@ -4766,7 +4790,7 @@ mxge_attach(device_t dev) /* Initialise the ifmedia structure */ ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); - mxge_set_media(sc, IFM_ETHER | IFM_AUTO); + mxge_media_init(sc); mxge_media_probe(sc); sc->dying = 0; ether_ifattach(ifp, sc->mac_addr); diff --git a/sys/dev/mxge/if_mxge_var.h b/sys/dev/mxge/if_mxge_var.h index 5c1627f71e5..c85a29bfb58 100644 --- a/sys/dev/mxge/if_mxge_var.h +++ b/sys/dev/mxge/if_mxge_var.h @@ -268,6 +268,8 @@ struct mxge_softc { int num_slices; int rx_ring_size; int dying; + int connector; + int current_media; mxge_dma_t dmabench_dma; struct callout co_hdl; struct taskqueue *tq; @@ -293,6 +295,12 @@ struct mxge_softc { #define MXGE_MIN_THROTTLE 416 #define MXGE_MAX_THROTTLE 4096 +/* Types of connectors on NICs supported by this driver */ +#define MXGE_CX4 0 +#define MXGE_XFP 1 +#define MXGE_SFP 2 +#define MXGE_QRF 3 + #define MXGE_HIGHPART_TO_U32(X) \ (sizeof (X) == 8) ? ((uint32_t)((uint64_t)(X) >> 32)) : (0) #define MXGE_LOWPART_TO_U32(X) ((uint32_t)(X)) diff --git a/sys/dev/my/if_my.c b/sys/dev/my/if_my.c index 99f6071ba20..951473a3ba7 100644 --- a/sys/dev/my/if_my.c +++ b/sys/dev/my/if_my.c @@ -902,8 +902,8 @@ my_attach(device_t dev) ifp->if_start = my_start; ifp->if_init = my_init; ifp->if_baudrate = 10000000; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); if (sc->my_info->my_did == MTD803ID) diff --git a/sys/dev/nfe/if_nfe.c b/sys/dev/nfe/if_nfe.c index 5d0bfd268f6..7d77754b90c 100644 --- a/sys/dev/nfe/if_nfe.c +++ b/sys/dev/nfe/if_nfe.c @@ -2366,7 +2366,12 @@ nfe_encap(struct nfe_softc *sc, struct mbuf **m_head) m = *m_head; cflags = flags = 0; tso_segsz = 0; - if ((m->m_pkthdr.csum_flags & NFE_CSUM_FEATURES) != 0) { + if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { + tso_segsz = (uint32_t)m->m_pkthdr.tso_segsz << + NFE_TX_TSO_SHIFT; + cflags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_UDP_CSUM); + cflags |= NFE_TX_TSO; + } else if ((m->m_pkthdr.csum_flags & NFE_CSUM_FEATURES) != 0) { if ((m->m_pkthdr.csum_flags & CSUM_IP) != 0) cflags |= NFE_TX_IP_CSUM; if ((m->m_pkthdr.csum_flags & CSUM_TCP) != 0) @@ -2374,12 +2379,6 @@ nfe_encap(struct nfe_softc *sc, struct mbuf **m_head) if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) cflags |= NFE_TX_TCP_UDP_CSUM; } - if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { - tso_segsz = (uint32_t)m->m_pkthdr.tso_segsz << - NFE_TX_TSO_SHIFT; - cflags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_UDP_CSUM); - cflags |= NFE_TX_TSO; - } for (i = 0; i < nsegs; i++) { if (sc->nfe_flags & NFE_40BIT_ADDR) { diff --git a/sys/dev/nxge/if_nxge.c b/sys/dev/nxge/if_nxge.c index c86d7b61bad..0e67413747c 100644 --- a/sys/dev/nxge/if_nxge.c +++ b/sys/dev/nxge/if_nxge.c @@ -1190,7 +1190,7 @@ xge_interface_setup(device_t dev) ifnetp->if_start = xge_send; /* TODO: Check and assign optimal value */ - ifnetp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifnetp->if_snd.ifq_maxlen = ifqmaxlen; ifnetp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM; diff --git a/sys/dev/pdq/pdq_ifsubr.c b/sys/dev/pdq/pdq_ifsubr.c index ec141502a9e..4df00825fe4 100644 --- a/sys/dev/pdq/pdq_ifsubr.c +++ b/sys/dev/pdq/pdq_ifsubr.c @@ -471,7 +471,7 @@ pdq_ifattach(pdq_softc_t *sc, const pdq_uint8_t *llc, pdq_type_t type) if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_softc = sc; ifp->if_init = pdq_ifinit; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST; ifp->if_ioctl = pdq_ifioctl; diff --git a/sys/dev/ppbus/if_plip.c b/sys/dev/ppbus/if_plip.c index 3e4c1ca3b2b..fc81aec97b8 100644 --- a/sys/dev/ppbus/if_plip.c +++ b/sys/dev/ppbus/if_plip.c @@ -262,7 +262,7 @@ lp_attach(device_t dev) ifp->if_output = lpoutput; ifp->if_hdrlen = 0; ifp->if_addrlen = 0; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); diff --git a/sys/dev/quicc/quicc_bfe.h b/sys/dev/quicc/quicc_bfe.h index 9ba622c6794..5c589146ac4 100644 --- a/sys/dev/quicc/quicc_bfe.h +++ b/sys/dev/quicc/quicc_bfe.h @@ -50,7 +50,6 @@ struct quicc_softc { u_int sc_clock; int sc_fastintr:1; - int sc_leaving:1; int sc_polled:1; }; diff --git a/sys/dev/ral/rt2560.c b/sys/dev/ral/rt2560.c index 9549107005f..8589768d422 100644 --- a/sys/dev/ral/rt2560.c +++ b/sys/dev/ral/rt2560.c @@ -267,8 +267,8 @@ rt2560_attach(device_t dev, int id) ifp->if_init = rt2560_init; ifp->if_ioctl = rt2560_ioctl; ifp->if_start = rt2560_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/ral/rt2661.c b/sys/dev/ral/rt2661.c index 9bc88385768..9b77c23dc0e 100644 --- a/sys/dev/ral/rt2661.c +++ b/sys/dev/ral/rt2661.c @@ -269,8 +269,8 @@ rt2661_attach(device_t dev, int id) ifp->if_init = rt2661_init; ifp->if_ioctl = rt2661_ioctl; ifp->if_start = rt2661_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index 80915061c7d..ef68f145af0 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -1162,9 +1162,11 @@ re_attach(device_t dev) msic = 0; if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { sc->rl_flags |= RL_FLAG_PCIE; - /* Set PCIe maximum read request size to 2048. */ - if (pci_get_max_read_req(dev) < 2048) - pci_set_max_read_req(dev, 2048); + if (devid != RT_DEVICEID_8101E) { + /* Set PCIe maximum read request size to 2048. */ + if (pci_get_max_read_req(dev) < 2048) + pci_set_max_read_req(dev, 2048); + } msic = pci_msi_count(dev); if (bootverbose) device_printf(dev, "MSI count : %d\n", msic); diff --git a/sys/dev/sbni/if_sbni.c b/sys/dev/sbni/if_sbni.c index 8c8687c1295..7ca46d31115 100644 --- a/sys/dev/sbni/if_sbni.c +++ b/sys/dev/sbni/if_sbni.c @@ -235,7 +235,7 @@ sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags) ifp->if_init = sbni_init; ifp->if_start = sbni_start; ifp->if_ioctl = sbni_ioctl; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* report real baud rate */ csr0 = sbni_inb(sc, CSR0); diff --git a/sys/dev/sge/if_sge.c b/sys/dev/sge/if_sge.c new file mode 100644 index 00000000000..ee88cb6fb1d --- /dev/null +++ b/sys/dev/sge/if_sge.c @@ -0,0 +1,1857 @@ +/*- + * Copyright (c) 2008-2010 Nikolay Denev + * Copyright (c) 2007-2008 Alexander Pohoyda + * Copyright (c) 1997, 1998, 1999 + * Bill Paul . All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Bill Paul. + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AUTHORS OR + * THE VOICES IN THEIR HEADS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * SiS 190/191 PCI Ethernet NIC driver. + * + * Adapted to SiS 190 NIC by Alexander Pohoyda based on the original + * SiS 900 driver by Bill Paul, using SiS 190/191 Solaris driver by + * Masayuki Murayama and SiS 190/191 GNU/Linux driver by K.M. Liu + * . Thanks to Pyun YongHyeon for + * review and very useful comments. + * + * Adapted to SiS 191 NIC by Nikolay Denev with further ideas from the + * Linux and Solaris drivers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include + +MODULE_DEPEND(sge, pci, 1, 1, 1); +MODULE_DEPEND(sge, ether, 1, 1, 1); +MODULE_DEPEND(sge, miibus, 1, 1, 1); + +/* "device miibus0" required. See GENERIC if you get errors here. */ +#include "miibus_if.h" + +/* + * Various supported device vendors/types and their names. + */ +static struct sge_type sge_devs[] = { + { SIS_VENDORID, SIS_DEVICEID_190, "SiS190 Fast Ethernet" }, + { SIS_VENDORID, SIS_DEVICEID_191, "SiS191 Fast/Gigabit Ethernet" }, + { 0, 0, NULL } +}; + +static int sge_probe(device_t); +static int sge_attach(device_t); +static int sge_detach(device_t); +static int sge_shutdown(device_t); +static int sge_suspend(device_t); +static int sge_resume(device_t); + +static int sge_miibus_readreg(device_t, int, int); +static int sge_miibus_writereg(device_t, int, int, int); +static void sge_miibus_statchg(device_t); + +static int sge_newbuf(struct sge_softc *, int); +static int sge_encap(struct sge_softc *, struct mbuf **); +#ifndef __NO_STRICT_ALIGNMENT +static __inline void + sge_fixup_rx(struct mbuf *); +#endif +static __inline void + sge_discard_rxbuf(struct sge_softc *, int); +static void sge_rxeof(struct sge_softc *); +static void sge_txeof(struct sge_softc *); +static void sge_intr(void *); +static void sge_tick(void *); +static void sge_start(struct ifnet *); +static void sge_start_locked(struct ifnet *); +static int sge_ioctl(struct ifnet *, u_long, caddr_t); +static void sge_init(void *); +static void sge_init_locked(struct sge_softc *); +static void sge_stop(struct sge_softc *); +static void sge_watchdog(struct sge_softc *); +static int sge_ifmedia_upd(struct ifnet *); +static void sge_ifmedia_sts(struct ifnet *, struct ifmediareq *); + +static int sge_get_mac_addr_apc(struct sge_softc *, uint8_t *); +static int sge_get_mac_addr_eeprom(struct sge_softc *, uint8_t *); +static uint16_t sge_read_eeprom(struct sge_softc *, int); + +static void sge_rxfilter(struct sge_softc *); +static void sge_setvlan(struct sge_softc *); +static void sge_reset(struct sge_softc *); +static int sge_list_rx_init(struct sge_softc *); +static int sge_list_rx_free(struct sge_softc *); +static int sge_list_tx_init(struct sge_softc *); +static int sge_list_tx_free(struct sge_softc *); + +static int sge_dma_alloc(struct sge_softc *); +static void sge_dma_free(struct sge_softc *); +static void sge_dma_map_addr(void *, bus_dma_segment_t *, int, int); + +static device_method_t sge_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, sge_probe), + DEVMETHOD(device_attach, sge_attach), + DEVMETHOD(device_detach, sge_detach), + DEVMETHOD(device_suspend, sge_suspend), + DEVMETHOD(device_resume, sge_resume), + DEVMETHOD(device_shutdown, sge_shutdown), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_driver_added, bus_generic_driver_added), + + /* MII interface */ + DEVMETHOD(miibus_readreg, sge_miibus_readreg), + DEVMETHOD(miibus_writereg, sge_miibus_writereg), + DEVMETHOD(miibus_statchg, sge_miibus_statchg), + + KOBJMETHOD_END +}; + +static driver_t sge_driver = { + "sge", sge_methods, sizeof(struct sge_softc) +}; + +static devclass_t sge_devclass; + +DRIVER_MODULE(sge, pci, sge_driver, sge_devclass, 0, 0); +DRIVER_MODULE(miibus, sge, miibus_driver, miibus_devclass, 0, 0); + +/* + * Register space access macros. + */ +#define CSR_WRITE_4(sc, reg, val) bus_write_4(sc->sge_res, reg, val) +#define CSR_WRITE_2(sc, reg, val) bus_write_2(sc->sge_res, reg, val) +#define CSR_WRITE_1(cs, reg, val) bus_write_1(sc->sge_res, reg, val) + +#define CSR_READ_4(sc, reg) bus_read_4(sc->sge_res, reg) +#define CSR_READ_2(sc, reg) bus_read_2(sc->sge_res, reg) +#define CSR_READ_1(sc, reg) bus_read_1(sc->sge_res, reg) + +/* Define to show Tx/Rx error status. */ +#undef SGE_SHOW_ERRORS + +#define SGE_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) + +static void +sge_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + bus_addr_t *p; + + if (error != 0) + return; + KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); + p = arg; + *p = segs->ds_addr; +} + +/* + * Read a sequence of words from the EEPROM. + */ +static uint16_t +sge_read_eeprom(struct sge_softc *sc, int offset) +{ + uint32_t val; + int i; + + KASSERT(offset <= EI_OFFSET, ("EEPROM offset too big")); + CSR_WRITE_4(sc, ROMInterface, + EI_REQ | EI_OP_RD | (offset << EI_OFFSET_SHIFT)); + DELAY(500); + for (i = 0; i < SGE_TIMEOUT; i++) { + val = CSR_READ_4(sc, ROMInterface); + if ((val & EI_REQ) == 0) + break; + DELAY(100); + } + if (i == SGE_TIMEOUT) { + device_printf(sc->sge_dev, + "EEPROM read timeout : 0x%08x\n", val); + return (0xffff); + } + + return ((val & EI_DATA) >> EI_DATA_SHIFT); +} + +static int +sge_get_mac_addr_eeprom(struct sge_softc *sc, uint8_t *dest) +{ + uint16_t val; + int i; + + val = sge_read_eeprom(sc, EEPROMSignature); + if (val == 0xffff || val == 0) { + device_printf(sc->sge_dev, + "invalid EEPROM signature : 0x%04x\n", val); + return (EINVAL); + } + + for (i = 0; i < ETHER_ADDR_LEN; i += 2) { + val = sge_read_eeprom(sc, EEPROMMACAddr + i / 2); + dest[i + 0] = (uint8_t)val; + dest[i + 1] = (uint8_t)(val >> 8); + } + + if ((sge_read_eeprom(sc, EEPROMInfo) & 0x80) != 0) + sc->sge_flags |= SGE_FLAG_RGMII; + return (0); +} + +/* + * For SiS96x, APC CMOS RAM is used to store ethernet address. + * APC CMOS RAM is accessed through ISA bridge. + */ +static int +sge_get_mac_addr_apc(struct sge_softc *sc, uint8_t *dest) +{ +#if defined(__amd64__) || defined(__i386__) + devclass_t pci; + device_t bus, dev = NULL; + device_t *kids; + struct apc_tbl { + uint16_t vid; + uint16_t did; + } *tp, apc_tbls[] = { + { SIS_VENDORID, 0x0965 }, + { SIS_VENDORID, 0x0966 }, + { SIS_VENDORID, 0x0968 } + }; + uint8_t reg; + int busnum, cnt, i, j, numkids; + + cnt = sizeof(apc_tbls) / sizeof(apc_tbls[0]); + pci = devclass_find("pci"); + for (busnum = 0; busnum < devclass_get_maxunit(pci); busnum++) { + bus = devclass_get_device(pci, busnum); + if (!bus) + continue; + if (device_get_children(bus, &kids, &numkids) != 0) + continue; + for (i = 0; i < numkids; i++) { + dev = kids[i]; + if (pci_get_class(dev) == PCIC_BRIDGE && + pci_get_subclass(dev) == PCIS_BRIDGE_ISA) { + tp = apc_tbls; + for (j = 0; j < cnt; j++) { + if (pci_get_vendor(dev) == tp->vid && + pci_get_device(dev) == tp->did) { + free(kids, M_TEMP); + goto apc_found; + } + tp++; + } + } + } + free(kids, M_TEMP); + } + device_printf(sc->sge_dev, "couldn't find PCI-ISA bridge\n"); + return (EINVAL); +apc_found: + /* Enable port 0x78 and 0x79 to access APC registers. */ + reg = pci_read_config(dev, 0x48, 1); + pci_write_config(dev, 0x48, reg & ~0x02, 1); + DELAY(50); + pci_read_config(dev, 0x48, 1); + /* Read stored ethernet address. */ + for (i = 0; i < ETHER_ADDR_LEN; i++) { + outb(0x78, 0x09 + i); + dest[i] = inb(0x79); + } + outb(0x78, 0x12); + if ((inb(0x79) & 0x80) != 0) + sc->sge_flags |= SGE_FLAG_RGMII; + /* Restore access to APC registers. */ + pci_write_config(dev, 0x48, reg, 1); + + return (0); +#else + return (EINVAL); +#endif +} + +static int +sge_miibus_readreg(device_t dev, int phy, int reg) +{ + struct sge_softc *sc; + uint32_t val; + int i; + + sc = device_get_softc(dev); + CSR_WRITE_4(sc, GMIIControl, (phy << GMI_PHY_SHIFT) | + (reg << GMI_REG_SHIFT) | GMI_OP_RD | GMI_REQ); + DELAY(10); + for (i = 0; i < SGE_TIMEOUT; i++) { + val = CSR_READ_4(sc, GMIIControl); + if ((val & GMI_REQ) == 0) + break; + DELAY(10); + } + if (i == SGE_TIMEOUT) { + device_printf(sc->sge_dev, "PHY read timeout : %d\n", reg); + return (0); + } + return ((val & GMI_DATA) >> GMI_DATA_SHIFT); +} + +static int +sge_miibus_writereg(device_t dev, int phy, int reg, int data) +{ + struct sge_softc *sc; + uint32_t val; + int i; + + sc = device_get_softc(dev); + CSR_WRITE_4(sc, GMIIControl, (phy << GMI_PHY_SHIFT) | + (reg << GMI_REG_SHIFT) | (data << GMI_DATA_SHIFT) | + GMI_OP_WR | GMI_REQ); + DELAY(10); + for (i = 0; i < SGE_TIMEOUT; i++) { + val = CSR_READ_4(sc, GMIIControl); + if ((val & GMI_REQ) == 0) + break; + DELAY(10); + } + if (i == SGE_TIMEOUT) + device_printf(sc->sge_dev, "PHY write timeout : %d\n", reg); + return (0); +} + +static void +sge_miibus_statchg(device_t dev) +{ + struct sge_softc *sc; + struct mii_data *mii; + struct ifnet *ifp; + uint32_t ctl, speed; + + sc = device_get_softc(dev); + mii = device_get_softc(sc->sge_miibus); + ifp = sc->sge_ifp; + if (mii == NULL || ifp == NULL || + (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return; + speed = 0; + sc->sge_flags &= ~SGE_FLAG_LINK; + if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == + (IFM_ACTIVE | IFM_AVALID)) { + switch (IFM_SUBTYPE(mii->mii_media_active)) { + case IFM_10_T: + sc->sge_flags |= SGE_FLAG_LINK; + speed = SC_SPEED_10; + break; + case IFM_100_TX: + sc->sge_flags |= SGE_FLAG_LINK; + speed = SC_SPEED_100; + break; + case IFM_1000_T: + if ((sc->sge_flags & SGE_FLAG_FASTETHER) == 0) { + sc->sge_flags |= SGE_FLAG_LINK; + speed = SC_SPEED_1000; + } + break; + default: + break; + } + } + if ((sc->sge_flags & SGE_FLAG_LINK) == 0) + return; + /* Reprogram MAC to resolved speed/duplex/flow-control parameters. */ + ctl = CSR_READ_4(sc, StationControl); + ctl &= ~(0x0f000000 | SC_FDX | SC_SPEED_MASK); + if (speed == SC_SPEED_1000) { + ctl |= 0x07000000; + sc->sge_flags |= SGE_FLAG_SPEED_1000; + } else { + ctl |= 0x04000000; + sc->sge_flags &= ~SGE_FLAG_SPEED_1000; + } +#ifdef notyet + if ((sc->sge_flags & SGE_FLAG_GMII) != 0) + ctl |= 0x03000000; +#endif + ctl |= speed; + if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { + ctl |= SC_FDX; + sc->sge_flags |= SGE_FLAG_FDX; + } else + sc->sge_flags &= ~SGE_FLAG_FDX; + CSR_WRITE_4(sc, StationControl, ctl); + if ((sc->sge_flags & SGE_FLAG_RGMII) != 0) { + CSR_WRITE_4(sc, RGMIIDelay, 0x0441); + CSR_WRITE_4(sc, RGMIIDelay, 0x0440); + } +} + +static void +sge_rxfilter(struct sge_softc *sc) +{ + struct ifnet *ifp; + struct ifmultiaddr *ifma; + uint32_t crc, hashes[2]; + uint16_t rxfilt; + + SGE_LOCK_ASSERT(sc); + + ifp = sc->sge_ifp; + rxfilt = CSR_READ_2(sc, RxMacControl); + rxfilt &= ~(AcceptBroadcast | AcceptAllPhys | AcceptMulticast); + rxfilt |= AcceptMyPhys; + if ((ifp->if_flags & IFF_BROADCAST) != 0) + rxfilt |= AcceptBroadcast; + if ((ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) != 0) { + if ((ifp->if_flags & IFF_PROMISC) != 0) + rxfilt |= AcceptAllPhys; + rxfilt |= AcceptMulticast; + hashes[0] = 0xFFFFFFFF; + hashes[1] = 0xFFFFFFFF; + } else { + rxfilt |= AcceptMulticast; + hashes[0] = hashes[1] = 0; + /* Now program new ones. */ + if_maddr_rlock(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + crc = ether_crc32_be(LLADDR((struct sockaddr_dl *) + ifma->ifma_addr), ETHER_ADDR_LEN); + hashes[crc >> 31] |= 1 << ((crc >> 26) & 0x1f); + } + if_maddr_runlock(ifp); + } + CSR_WRITE_2(sc, RxMacControl, rxfilt | 0x02); + CSR_WRITE_4(sc, RxHashTable, hashes[0]); + CSR_WRITE_4(sc, RxHashTable2, hashes[1]); +} + +static void +sge_setvlan(struct sge_softc *sc) +{ + struct ifnet *ifp; + uint16_t rxfilt; + + SGE_LOCK_ASSERT(sc); + + ifp = sc->sge_ifp; + if ((ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) == 0) + return; + rxfilt = CSR_READ_2(sc, RxMacControl); + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) + rxfilt |= RXMAC_STRIP_VLAN; + else + rxfilt &= ~RXMAC_STRIP_VLAN; + CSR_WRITE_2(sc, RxMacControl, rxfilt); +} + +static void +sge_reset(struct sge_softc *sc) +{ + + CSR_WRITE_4(sc, IntrMask, 0); + CSR_WRITE_4(sc, IntrStatus, 0xffffffff); + + /* Soft reset. */ + CSR_WRITE_4(sc, IntrControl, 0x8000); + CSR_READ_4(sc, IntrControl); + DELAY(100); + CSR_WRITE_4(sc, IntrControl, 0); + /* Stop MAC. */ + CSR_WRITE_4(sc, TX_CTL, 0x1a00); + CSR_WRITE_4(sc, RX_CTL, 0x1a00); + + CSR_WRITE_4(sc, IntrMask, 0); + CSR_WRITE_4(sc, IntrStatus, 0xffffffff); + + CSR_WRITE_4(sc, GMIIControl, 0); +} + +/* + * Probe for an SiS chip. Check the PCI vendor and device + * IDs against our list and return a device name if we find a match. + */ +static int +sge_probe(device_t dev) +{ + struct sge_type *t; + + t = sge_devs; + while (t->sge_name != NULL) { + if ((pci_get_vendor(dev) == t->sge_vid) && + (pci_get_device(dev) == t->sge_did)) { + device_set_desc(dev, t->sge_name); + return (BUS_PROBE_DEFAULT); + } + t++; + } + + return (ENXIO); +} + +/* + * Attach the interface. Allocate softc structures, do ifmedia + * setup and ethernet/BPF attach. + */ +static int +sge_attach(device_t dev) +{ + struct sge_softc *sc; + struct ifnet *ifp; + uint8_t eaddr[ETHER_ADDR_LEN]; + int error = 0, rid; + + sc = device_get_softc(dev); + sc->sge_dev = dev; + + mtx_init(&sc->sge_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, + MTX_DEF); + callout_init_mtx(&sc->sge_stat_ch, &sc->sge_mtx, 0); + + /* + * Map control/status registers. + */ + pci_enable_busmaster(dev); + + /* Allocate resources. */ + sc->sge_res_id = PCIR_BAR(0); + sc->sge_res_type = SYS_RES_MEMORY; + sc->sge_res = bus_alloc_resource_any(dev, sc->sge_res_type, + &sc->sge_res_id, RF_ACTIVE); + if (sc->sge_res == NULL) { + device_printf(dev, "couldn't allocate resource\n"); + error = ENXIO; + goto fail; + } + + rid = 0; + sc->sge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_SHAREABLE | RF_ACTIVE); + if (sc->sge_irq == NULL) { + device_printf(dev, "couldn't allocate IRQ resources\n"); + error = ENXIO; + goto fail; + } + sc->sge_rev = pci_get_revid(dev); + if (pci_get_device(dev) == SIS_DEVICEID_190) + sc->sge_flags |= SGE_FLAG_FASTETHER | SGE_FLAG_SIS190; + /* Reset the adapter. */ + sge_reset(sc); + + /* Get MAC address from the EEPROM. */ + if ((pci_read_config(dev, 0x73, 1) & 0x01) != 0) + sge_get_mac_addr_apc(sc, eaddr); + else + sge_get_mac_addr_eeprom(sc, eaddr); + + if ((error = sge_dma_alloc(sc)) != 0) + goto fail; + + ifp = sc->sge_ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + device_printf(dev, "cannot allocate ifnet structure.\n"); + error = ENOSPC; + goto fail; + } + ifp->if_softc = sc; + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = sge_ioctl; + ifp->if_start = sge_start; + ifp->if_init = sge_init; + ifp->if_snd.ifq_drv_maxlen = SGE_TX_RING_CNT - 1; + IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); + IFQ_SET_READY(&ifp->if_snd); + ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_RXCSUM; + ifp->if_hwassist = SGE_CSUM_FEATURES; + ifp->if_capenable = ifp->if_capabilities; + /* + * Do MII setup. + */ + if (mii_phy_probe(dev, &sc->sge_miibus, sge_ifmedia_upd, + sge_ifmedia_sts)) { + device_printf(dev, "no PHY found!\n"); + error = ENXIO; + goto fail; + } + + /* + * Call MI attach routine. + */ + ether_ifattach(ifp, eaddr); + + /* VLAN setup. */ + if ((sc->sge_flags & SGE_FLAG_SIS190) == 0) + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | + IFCAP_VLAN_HWCSUM; + ifp->if_capabilities |= IFCAP_VLAN_MTU; + ifp->if_capenable = ifp->if_capabilities; + /* Tell the upper layer(s) we support long frames. */ + ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); + + /* Hook interrupt last to avoid having to lock softc */ + error = bus_setup_intr(dev, sc->sge_irq, INTR_TYPE_NET | INTR_MPSAFE, + NULL, sge_intr, sc, &sc->sge_intrhand); + if (error) { + device_printf(dev, "couldn't set up irq\n"); + ether_ifdetach(ifp); + goto fail; + } + +fail: + if (error) + sge_detach(dev); + + return (error); +} + +/* + * Shutdown hardware and free up resources. This can be called any + * time after the mutex has been initialized. It is called in both + * the error case in attach and the normal detach case so it needs + * to be careful about only freeing resources that have actually been + * allocated. + */ +static int +sge_detach(device_t dev) +{ + struct sge_softc *sc; + struct ifnet *ifp; + + sc = device_get_softc(dev); + ifp = sc->sge_ifp; + /* These should only be active if attach succeeded. */ + if (device_is_attached(dev)) { + ether_ifdetach(ifp); + SGE_LOCK(sc); + sge_stop(sc); + SGE_UNLOCK(sc); + callout_drain(&sc->sge_stat_ch); + } + if (sc->sge_miibus) + device_delete_child(dev, sc->sge_miibus); + bus_generic_detach(dev); + + if (sc->sge_intrhand) + bus_teardown_intr(dev, sc->sge_irq, sc->sge_intrhand); + if (sc->sge_irq) + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sge_irq); + if (sc->sge_res) + bus_release_resource(dev, sc->sge_res_type, sc->sge_res_id, + sc->sge_res); + if (ifp) + if_free(ifp); + sge_dma_free(sc); + mtx_destroy(&sc->sge_mtx); + + return (0); +} + +/* + * Stop all chip I/O so that the kernel's probe routines don't + * get confused by errant DMAs when rebooting. + */ +static int +sge_shutdown(device_t dev) +{ + struct sge_softc *sc; + + sc = device_get_softc(dev); + SGE_LOCK(sc); + sge_stop(sc); + SGE_UNLOCK(sc); + return (0); +} + +static int +sge_suspend(device_t dev) +{ + struct sge_softc *sc; + struct ifnet *ifp; + + sc = device_get_softc(dev); + SGE_LOCK(sc); + ifp = sc->sge_ifp; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) + sge_stop(sc); + SGE_UNLOCK(sc); + return (0); +} + +static int +sge_resume(device_t dev) +{ + struct sge_softc *sc; + struct ifnet *ifp; + + sc = device_get_softc(dev); + SGE_LOCK(sc); + ifp = sc->sge_ifp; + if ((ifp->if_flags & IFF_UP) != 0) + sge_init_locked(sc); + SGE_UNLOCK(sc); + return (0); +} + +static int +sge_dma_alloc(struct sge_softc *sc) +{ + struct sge_chain_data *cd; + struct sge_list_data *ld; + struct sge_rxdesc *rxd; + struct sge_txdesc *txd; + int error, i; + + cd = &sc->sge_cdata; + ld = &sc->sge_ldata; + error = bus_dma_tag_create(bus_get_dma_tag(sc->sge_dev), + 1, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ + 1, /* nsegments */ + BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &cd->sge_tag); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create parent DMA tag.\n"); + goto fail; + } + + /* RX descriptor ring */ + error = bus_dma_tag_create(cd->sge_tag, + SGE_DESC_ALIGN, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + SGE_RX_RING_SZ, 1, /* maxsize,nsegments */ + SGE_RX_RING_SZ, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &cd->sge_rx_tag); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create Rx ring DMA tag.\n"); + goto fail; + } + /* Allocate DMA'able memory and load DMA map for RX ring. */ + error = bus_dmamem_alloc(cd->sge_rx_tag, (void **)&ld->sge_rx_ring, + BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &cd->sge_rx_dmamap); + if (error != 0) { + device_printf(sc->sge_dev, + "could not allocate DMA'able memory for Rx ring.\n"); + goto fail; + } + error = bus_dmamap_load(cd->sge_rx_tag, cd->sge_rx_dmamap, + ld->sge_rx_ring, SGE_RX_RING_SZ, sge_dma_map_addr, + &ld->sge_rx_paddr, BUS_DMA_NOWAIT); + if (error != 0) { + device_printf(sc->sge_dev, + "could not load DMA'able memory for Rx ring.\n"); + } + + /* TX descriptor ring */ + error = bus_dma_tag_create(cd->sge_tag, + SGE_DESC_ALIGN, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + SGE_TX_RING_SZ, 1, /* maxsize,nsegments */ + SGE_TX_RING_SZ, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &cd->sge_tx_tag); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create Rx ring DMA tag.\n"); + goto fail; + } + /* Allocate DMA'able memory and load DMA map for TX ring. */ + error = bus_dmamem_alloc(cd->sge_tx_tag, (void **)&ld->sge_tx_ring, + BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &cd->sge_tx_dmamap); + if (error != 0) { + device_printf(sc->sge_dev, + "could not allocate DMA'able memory for Tx ring.\n"); + goto fail; + } + error = bus_dmamap_load(cd->sge_tx_tag, cd->sge_tx_dmamap, + ld->sge_tx_ring, SGE_TX_RING_SZ, sge_dma_map_addr, + &ld->sge_tx_paddr, BUS_DMA_NOWAIT); + if (error != 0) { + device_printf(sc->sge_dev, + "could not load DMA'able memory for Rx ring.\n"); + goto fail; + } + + /* Create DMA tag for Tx buffers. */ + error = bus_dma_tag_create(cd->sge_tag, 1, 0, BUS_SPACE_MAXADDR, + BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * SGE_MAXTXSEGS, + SGE_MAXTXSEGS, MCLBYTES, 0, NULL, NULL, &cd->sge_txmbuf_tag); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create Tx mbuf DMA tag.\n"); + goto fail; + } + + /* Create DMA tag for Rx buffers. */ + error = bus_dma_tag_create(cd->sge_tag, SGE_RX_BUF_ALIGN, 0, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, + MCLBYTES, 0, NULL, NULL, &cd->sge_rxmbuf_tag); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create Rx mbuf DMA tag.\n"); + goto fail; + } + + /* Create DMA maps for Tx buffers. */ + for (i = 0; i < SGE_TX_RING_CNT; i++) { + txd = &cd->sge_txdesc[i]; + txd->tx_m = NULL; + txd->tx_dmamap = NULL; + txd->tx_ndesc = 0; + error = bus_dmamap_create(cd->sge_txmbuf_tag, 0, + &txd->tx_dmamap); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create Tx DMA map.\n"); + goto fail; + } + } + /* Create spare DMA map for Rx buffer. */ + error = bus_dmamap_create(cd->sge_rxmbuf_tag, 0, &cd->sge_rx_spare_map); + if (error != 0) { + device_printf(sc->sge_dev, + "could not create spare Rx DMA map.\n"); + goto fail; + } + /* Create DMA maps for Rx buffers. */ + for (i = 0; i < SGE_RX_RING_CNT; i++) { + rxd = &cd->sge_rxdesc[i]; + rxd->rx_m = NULL; + rxd->rx_dmamap = NULL; + error = bus_dmamap_create(cd->sge_rxmbuf_tag, 0, + &rxd->rx_dmamap); + if (error) { + device_printf(sc->sge_dev, + "could not create Rx DMA map.\n"); + goto fail; + } + } +fail: + return (error); +} + +static void +sge_dma_free(struct sge_softc *sc) +{ + struct sge_chain_data *cd; + struct sge_list_data *ld; + struct sge_rxdesc *rxd; + struct sge_txdesc *txd; + int i; + + cd = &sc->sge_cdata; + ld = &sc->sge_ldata; + /* Rx ring. */ + if (cd->sge_rx_tag != NULL) { + if (cd->sge_rx_dmamap != NULL) + bus_dmamap_unload(cd->sge_rx_tag, cd->sge_rx_dmamap); + if (cd->sge_rx_dmamap != NULL && ld->sge_rx_ring != NULL) + bus_dmamem_free(cd->sge_rx_tag, ld->sge_rx_ring, + cd->sge_rx_dmamap); + ld->sge_rx_ring = NULL; + cd->sge_rx_dmamap = NULL; + bus_dma_tag_destroy(cd->sge_rx_tag); + cd->sge_rx_tag = NULL; + } + /* Tx ring. */ + if (cd->sge_tx_tag != NULL) { + if (cd->sge_tx_dmamap != NULL) + bus_dmamap_unload(cd->sge_tx_tag, cd->sge_tx_dmamap); + if (cd->sge_tx_dmamap != NULL && ld->sge_tx_ring != NULL) + bus_dmamem_free(cd->sge_tx_tag, ld->sge_tx_ring, + cd->sge_tx_dmamap); + ld->sge_tx_ring = NULL; + cd->sge_tx_dmamap = NULL; + bus_dma_tag_destroy(cd->sge_tx_tag); + cd->sge_tx_tag = NULL; + } + /* Rx buffers. */ + if (cd->sge_rxmbuf_tag != NULL) { + for (i = 0; i < SGE_RX_RING_CNT; i++) { + rxd = &cd->sge_rxdesc[i]; + if (rxd->rx_dmamap != NULL) { + bus_dmamap_destroy(cd->sge_rxmbuf_tag, + rxd->rx_dmamap); + rxd->rx_dmamap = NULL; + } + } + if (cd->sge_rx_spare_map != NULL) { + bus_dmamap_destroy(cd->sge_rxmbuf_tag, + cd->sge_rx_spare_map); + cd->sge_rx_spare_map = NULL; + } + bus_dma_tag_destroy(cd->sge_rxmbuf_tag); + cd->sge_rxmbuf_tag = NULL; + } + /* Tx buffers. */ + if (cd->sge_txmbuf_tag != NULL) { + for (i = 0; i < SGE_TX_RING_CNT; i++) { + txd = &cd->sge_txdesc[i]; + if (txd->tx_dmamap != NULL) { + bus_dmamap_destroy(cd->sge_txmbuf_tag, + txd->tx_dmamap); + txd->tx_dmamap = NULL; + } + } + bus_dma_tag_destroy(cd->sge_txmbuf_tag); + cd->sge_txmbuf_tag = NULL; + } + if (cd->sge_tag != NULL) + bus_dma_tag_destroy(cd->sge_tag); + cd->sge_tag = NULL; +} + +/* + * Initialize the TX descriptors. + */ +static int +sge_list_tx_init(struct sge_softc *sc) +{ + struct sge_list_data *ld; + struct sge_chain_data *cd; + + SGE_LOCK_ASSERT(sc); + ld = &sc->sge_ldata; + cd = &sc->sge_cdata; + bzero(ld->sge_tx_ring, SGE_TX_RING_SZ); + ld->sge_tx_ring[SGE_TX_RING_CNT - 1].sge_flags = htole32(RING_END); + bus_dmamap_sync(cd->sge_tx_tag, cd->sge_tx_dmamap, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + cd->sge_tx_prod = 0; + cd->sge_tx_cons = 0; + cd->sge_tx_cnt = 0; + return (0); +} + +static int +sge_list_tx_free(struct sge_softc *sc) +{ + struct sge_chain_data *cd; + struct sge_txdesc *txd; + int i; + + SGE_LOCK_ASSERT(sc); + cd = &sc->sge_cdata; + for (i = 0; i < SGE_TX_RING_CNT; i++) { + txd = &cd->sge_txdesc[i]; + if (txd->tx_m != NULL) { + bus_dmamap_sync(cd->sge_txmbuf_tag, txd->tx_dmamap, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(cd->sge_txmbuf_tag, txd->tx_dmamap); + m_freem(txd->tx_m); + txd->tx_m = NULL; + txd->tx_ndesc = 0; + } + } + + return (0); +} + +/* + * Initialize the RX descriptors and allocate mbufs for them. Note that + * we arrange the descriptors in a closed ring, so that the last descriptor + * has RING_END flag set. + */ +static int +sge_list_rx_init(struct sge_softc *sc) +{ + struct sge_chain_data *cd; + int i; + + SGE_LOCK_ASSERT(sc); + cd = &sc->sge_cdata; + cd->sge_rx_cons = 0; + bzero(sc->sge_ldata.sge_rx_ring, SGE_RX_RING_SZ); + for (i = 0; i < SGE_RX_RING_CNT; i++) { + if (sge_newbuf(sc, i) != 0) + return (ENOBUFS); + } + bus_dmamap_sync(cd->sge_rx_tag, cd->sge_rx_dmamap, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + return (0); +} + +static int +sge_list_rx_free(struct sge_softc *sc) +{ + struct sge_chain_data *cd; + struct sge_rxdesc *rxd; + int i; + + SGE_LOCK_ASSERT(sc); + cd = &sc->sge_cdata; + for (i = 0; i < SGE_RX_RING_CNT; i++) { + rxd = &cd->sge_rxdesc[i]; + if (rxd->rx_m != NULL) { + bus_dmamap_sync(cd->sge_rxmbuf_tag, rxd->rx_dmamap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(cd->sge_rxmbuf_tag, + rxd->rx_dmamap); + m_freem(rxd->rx_m); + rxd->rx_m = NULL; + } + } + return (0); +} + +/* + * Initialize an RX descriptor and attach an MBUF cluster. + */ +static int +sge_newbuf(struct sge_softc *sc, int prod) +{ + struct mbuf *m; + struct sge_desc *desc; + struct sge_chain_data *cd; + struct sge_rxdesc *rxd; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; + int error, nsegs; + + SGE_LOCK_ASSERT(sc); + + cd = &sc->sge_cdata; + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + return (ENOBUFS); + m->m_len = m->m_pkthdr.len = MCLBYTES; + m_adj(m, SGE_RX_BUF_ALIGN); + error = bus_dmamap_load_mbuf_sg(cd->sge_rxmbuf_tag, + cd->sge_rx_spare_map, m, segs, &nsegs, 0); + if (error != 0) { + m_freem(m); + return (error); + } + KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); + rxd = &cd->sge_rxdesc[prod]; + if (rxd->rx_m != NULL) { + bus_dmamap_sync(cd->sge_rxmbuf_tag, rxd->rx_dmamap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(cd->sge_rxmbuf_tag, rxd->rx_dmamap); + } + map = rxd->rx_dmamap; + rxd->rx_dmamap = cd->sge_rx_spare_map; + cd->sge_rx_spare_map = map; + bus_dmamap_sync(cd->sge_rxmbuf_tag, rxd->rx_dmamap, + BUS_DMASYNC_PREREAD); + rxd->rx_m = m; + + desc = &sc->sge_ldata.sge_rx_ring[prod]; + desc->sge_sts_size = 0; + desc->sge_ptr = htole32(SGE_ADDR_LO(segs[0].ds_addr)); + desc->sge_flags = htole32(segs[0].ds_len); + if (prod == SGE_RX_RING_CNT - 1) + desc->sge_flags |= htole32(RING_END); + desc->sge_cmdsts = htole32(RDC_OWN | RDC_INTR | RDC_IP_CSUM | + RDC_TCP_CSUM | RDC_UDP_CSUM); + return (0); +} + +#ifndef __NO_STRICT_ALIGNMENT +static __inline void +sge_fixup_rx(struct mbuf *m) +{ + int i; + uint16_t *src, *dst; + + src = mtod(m, uint16_t *); + dst = src - 3; + + for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) + *dst++ = *src++; + + m->m_data -= (SGE_RX_BUF_ALIGN - ETHER_ALIGN); +} +#endif + +static __inline void +sge_discard_rxbuf(struct sge_softc *sc, int index) +{ + struct sge_desc *desc; + + desc = &sc->sge_ldata.sge_rx_ring[index]; + desc->sge_sts_size = 0; + desc->sge_flags = htole32(MCLBYTES - SGE_RX_BUF_ALIGN); + if (index == SGE_RX_RING_CNT - 1) + desc->sge_flags |= htole32(RING_END); + desc->sge_cmdsts = htole32(RDC_OWN | RDC_INTR | RDC_IP_CSUM | + RDC_TCP_CSUM | RDC_UDP_CSUM); +} + +/* + * A frame has been uploaded: pass the resulting mbuf chain up to + * the higher level protocols. + */ +static void +sge_rxeof(struct sge_softc *sc) +{ + struct ifnet *ifp; + struct mbuf *m; + struct sge_chain_data *cd; + struct sge_desc *cur_rx; + uint32_t rxinfo, rxstat; + int cons, prog; + + SGE_LOCK_ASSERT(sc); + + ifp = sc->sge_ifp; + cd = &sc->sge_cdata; + + bus_dmamap_sync(cd->sge_rx_tag, cd->sge_rx_dmamap, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + cons = cd->sge_rx_cons; + for (prog = 0; prog < SGE_RX_RING_CNT; prog++, + SGE_INC(cons, SGE_RX_RING_CNT)) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + cur_rx = &sc->sge_ldata.sge_rx_ring[cons]; + rxinfo = le32toh(cur_rx->sge_cmdsts); + if ((rxinfo & RDC_OWN) != 0) + break; + rxstat = le32toh(cur_rx->sge_sts_size); + if ((rxstat & RDS_CRCOK) == 0 || SGE_RX_ERROR(rxstat) != 0 || + SGE_RX_NSEGS(rxstat) != 1) { + /* XXX We don't support multi-segment frames yet. */ +#ifdef SGE_SHOW_ERRORS + device_printf(sc->sge_dev, "Rx error : 0x%b\n", rxstat, + RX_ERR_BITS); +#endif + sge_discard_rxbuf(sc, cons); + ifp->if_ierrors++; + continue; + } + m = cd->sge_rxdesc[cons].rx_m; + if (sge_newbuf(sc, cons) != 0) { + sge_discard_rxbuf(sc, cons); + ifp->if_iqdrops++; + continue; + } + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { + if ((rxinfo & RDC_IP_CSUM) != 0 && + (rxinfo & RDC_IP_CSUM_OK) != 0) + m->m_pkthdr.csum_flags |= + CSUM_IP_CHECKED | CSUM_IP_VALID; + if (((rxinfo & RDC_TCP_CSUM) != 0 && + (rxinfo & RDC_TCP_CSUM_OK) != 0) || + ((rxinfo & RDC_UDP_CSUM) != 0 && + (rxinfo & RDC_UDP_CSUM_OK) != 0)) { + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } + } + /* Check for VLAN tagged frame. */ + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (rxstat & RDS_VLAN) != 0) { + m->m_pkthdr.ether_vtag = rxinfo & RDC_VLAN_MASK; + m->m_flags |= M_VLANTAG; + } + if ((sc->sge_flags & SGE_FLAG_SIS190) == 0) { + /* + * Account for 10bytes auto padding which is used + * to align IP header on 32bit boundary. Also note, + * CRC bytes is automatically removed by the + * hardware. + */ + m->m_data += SGE_RX_PAD_BYTES; + m->m_pkthdr.len = m->m_len = SGE_RX_BYTES(rxstat) - + SGE_RX_PAD_BYTES; + } else { + m->m_pkthdr.len = m->m_len = SGE_RX_BYTES(rxstat) - + ETHER_CRC_LEN; +#ifndef __NO_STRICT_ALIGNMENT + sge_fixup_rx(m); +#endif + } + m->m_pkthdr.rcvif = ifp; + ifp->if_ipackets++; + SGE_UNLOCK(sc); + (*ifp->if_input)(ifp, m); + SGE_LOCK(sc); + } + + if (prog > 0) { + bus_dmamap_sync(cd->sge_rx_tag, cd->sge_rx_dmamap, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + cd->sge_rx_cons = cons; + } +} + +/* + * A frame was downloaded to the chip. It's safe for us to clean up + * the list buffers. + */ +static void +sge_txeof(struct sge_softc *sc) +{ + struct ifnet *ifp; + struct sge_list_data *ld; + struct sge_chain_data *cd; + struct sge_txdesc *txd; + uint32_t txstat; + int cons, nsegs, prod; + + SGE_LOCK_ASSERT(sc); + + ifp = sc->sge_ifp; + ld = &sc->sge_ldata; + cd = &sc->sge_cdata; + + if (cd->sge_tx_cnt == 0) + return; + bus_dmamap_sync(cd->sge_tx_tag, cd->sge_tx_dmamap, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + cons = cd->sge_tx_cons; + prod = cd->sge_tx_prod; + for (; cons != prod;) { + txstat = le32toh(ld->sge_tx_ring[cons].sge_cmdsts); + if ((txstat & TDC_OWN) != 0) + break; + /* + * Only the first descriptor of multi-descriptor transmission + * is updated by controller. Driver should skip entire + * chained buffers for the transmitted frame. In other words + * TDC_OWN bit is valid only at the first descriptor of a + * multi-descriptor transmission. + */ + if (SGE_TX_ERROR(txstat) != 0) { +#ifdef SGE_SHOW_ERRORS + device_printf(sc->sge_dev, "Tx error : 0x%b\n", + txstat, TX_ERR_BITS); +#endif + ifp->if_oerrors++; + } else { +#ifdef notyet + ifp->if_collisions += (txstat & 0xFFFF) - 1; +#endif + ifp->if_opackets++; + } + txd = &cd->sge_txdesc[cons]; + for (nsegs = 0; nsegs < txd->tx_ndesc; nsegs++) { + ld->sge_tx_ring[cons].sge_cmdsts = 0; + SGE_INC(cons, SGE_TX_RING_CNT); + } + /* Reclaim transmitted mbuf. */ + KASSERT(txd->tx_m != NULL, + ("%s: freeing NULL mbuf\n", __func__)); + bus_dmamap_sync(cd->sge_txmbuf_tag, txd->tx_dmamap, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(cd->sge_txmbuf_tag, txd->tx_dmamap); + m_freem(txd->tx_m); + txd->tx_m = NULL; + cd->sge_tx_cnt -= txd->tx_ndesc; + KASSERT(cd->sge_tx_cnt >= 0, + ("%s: Active Tx desc counter was garbled\n", __func__)); + txd->tx_ndesc = 0; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + } + cd->sge_tx_cons = cons; + if (cd->sge_tx_cnt == 0) + sc->sge_timer = 0; +} + +static void +sge_tick(void *arg) +{ + struct sge_softc *sc; + struct mii_data *mii; + struct ifnet *ifp; + + sc = arg; + SGE_LOCK_ASSERT(sc); + + ifp = sc->sge_ifp; + mii = device_get_softc(sc->sge_miibus); + mii_tick(mii); + if ((sc->sge_flags & SGE_FLAG_LINK) == 0) { + sge_miibus_statchg(sc->sge_dev); + if ((sc->sge_flags & SGE_FLAG_LINK) != 0 && + !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + sge_start_locked(ifp); + } + /* + * Reclaim transmitted frames here as we do not request + * Tx completion interrupt for every queued frames to + * reduce excessive interrupts. + */ + sge_txeof(sc); + sge_watchdog(sc); + callout_reset(&sc->sge_stat_ch, hz, sge_tick, sc); +} + +static void +sge_intr(void *arg) +{ + struct sge_softc *sc; + struct ifnet *ifp; + uint32_t status; + + sc = arg; + SGE_LOCK(sc); + ifp = sc->sge_ifp; + + status = CSR_READ_4(sc, IntrStatus); + if (status == 0xFFFFFFFF || (status & SGE_INTRS) == 0) { + /* Not ours. */ + SGE_UNLOCK(sc); + return; + } + /* Acknowledge interrupts. */ + CSR_WRITE_4(sc, IntrStatus, status); + /* Disable further interrupts. */ + CSR_WRITE_4(sc, IntrMask, 0); + /* + * It seems the controller supports some kind of interrupt + * moderation mechanism but we still don't know how to + * enable that. To reduce number of generated interrupts + * under load we check pending interrupts in a loop. This + * will increase number of register access and is not correct + * way to handle interrupt moderation but there seems to be + * no other way at this time. + */ + for (;;) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + if ((status & (INTR_RX_DONE | INTR_RX_IDLE)) != 0) { + sge_rxeof(sc); + /* Wakeup Rx MAC. */ + if ((status & INTR_RX_IDLE) != 0) + CSR_WRITE_4(sc, RX_CTL, + 0x1a00 | 0x000c | RX_CTL_POLL | RX_CTL_ENB); + } + if ((status & (INTR_TX_DONE | INTR_TX_IDLE)) != 0) + sge_txeof(sc); + status = CSR_READ_4(sc, IntrStatus); + if ((status & SGE_INTRS) == 0) + break; + /* Acknowledge interrupts. */ + CSR_WRITE_4(sc, IntrStatus, status); + } + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { + /* Re-enable interrupts */ + CSR_WRITE_4(sc, IntrMask, SGE_INTRS); + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + sge_start_locked(ifp); + } + SGE_UNLOCK(sc); +} + +/* + * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data + * pointers to the fragment pointers. + */ +static int +sge_encap(struct sge_softc *sc, struct mbuf **m_head) +{ + struct mbuf *m; + struct sge_desc *desc; + struct sge_txdesc *txd; + bus_dma_segment_t txsegs[SGE_MAXTXSEGS]; + uint32_t cflags; + int error, i, nsegs, prod, si; + + SGE_LOCK_ASSERT(sc); + + si = prod = sc->sge_cdata.sge_tx_prod; + txd = &sc->sge_cdata.sge_txdesc[prod]; + error = bus_dmamap_load_mbuf_sg(sc->sge_cdata.sge_txmbuf_tag, + txd->tx_dmamap, *m_head, txsegs, &nsegs, 0); + if (error == EFBIG) { + m = m_collapse(*m_head, M_DONTWAIT, SGE_MAXTXSEGS); + if (m == NULL) { + m_freem(*m_head); + *m_head = NULL; + return (ENOBUFS); + } + *m_head = m; + error = bus_dmamap_load_mbuf_sg(sc->sge_cdata.sge_txmbuf_tag, + txd->tx_dmamap, *m_head, txsegs, &nsegs, 0); + if (error != 0) { + m_freem(*m_head); + *m_head = NULL; + return (error); + } + } else if (error != 0) + return (error); + + KASSERT(nsegs != 0, ("zero segment returned")); + /* Check descriptor overrun. */ + if (sc->sge_cdata.sge_tx_cnt + nsegs >= SGE_TX_RING_CNT) { + bus_dmamap_unload(sc->sge_cdata.sge_txmbuf_tag, txd->tx_dmamap); + return (ENOBUFS); + } + bus_dmamap_sync(sc->sge_cdata.sge_txmbuf_tag, txd->tx_dmamap, + BUS_DMASYNC_PREWRITE); + + m = *m_head; + cflags = 0; + if (m->m_pkthdr.csum_flags & CSUM_IP) + cflags |= TDC_IP_CSUM; + if (m->m_pkthdr.csum_flags & CSUM_TCP) + cflags |= TDC_TCP_CSUM; + if (m->m_pkthdr.csum_flags & CSUM_UDP) + cflags |= TDC_UDP_CSUM; + for (i = 0; i < nsegs; i++) { + desc = &sc->sge_ldata.sge_tx_ring[prod]; + if (i == 0) { + desc->sge_sts_size = htole32(m->m_pkthdr.len); + desc->sge_cmdsts = 0; + } else { + desc->sge_sts_size = 0; + desc->sge_cmdsts = htole32(TDC_OWN); + } + desc->sge_ptr = htole32(SGE_ADDR_LO(txsegs[i].ds_addr)); + desc->sge_flags = htole32(txsegs[i].ds_len); + if (prod == SGE_TX_RING_CNT - 1) + desc->sge_flags |= htole32(RING_END); + sc->sge_cdata.sge_tx_cnt++; + SGE_INC(prod, SGE_TX_RING_CNT); + } + /* Update producer index. */ + sc->sge_cdata.sge_tx_prod = prod; + + desc = &sc->sge_ldata.sge_tx_ring[si]; + /* Configure VLAN. */ + if((m->m_flags & M_VLANTAG) != 0) { + cflags |= m->m_pkthdr.ether_vtag; + desc->sge_sts_size |= htole32(TDS_INS_VLAN); + } + desc->sge_cmdsts |= htole32(TDC_DEF | TDC_CRC | TDC_PAD | cflags); +#if 1 + if ((sc->sge_flags & SGE_FLAG_SPEED_1000) != 0) + desc->sge_cmdsts |= htole32(TDC_BST); +#else + if ((sc->sge_flags & SGE_FLAG_FDX) == 0) { + desc->sge_cmdsts |= htole32(TDC_COL | TDC_CRS | TDC_BKF); + if ((sc->sge_flags & SGE_FLAG_SPEED_1000) != 0) + desc->sge_cmdsts |= htole32(TDC_EXT | TDC_BST); + } +#endif + /* Request interrupt and give ownership to controller. */ + desc->sge_cmdsts |= htole32(TDC_OWN | TDC_INTR); + txd->tx_m = m; + txd->tx_ndesc = nsegs; + return (0); +} + +static void +sge_start(struct ifnet *ifp) +{ + struct sge_softc *sc; + + sc = ifp->if_softc; + SGE_LOCK(sc); + sge_start_locked(ifp); + SGE_UNLOCK(sc); +} + +static void +sge_start_locked(struct ifnet *ifp) +{ + struct sge_softc *sc; + struct mbuf *m_head; + int queued = 0; + + sc = ifp->if_softc; + SGE_LOCK_ASSERT(sc); + + if ((sc->sge_flags & SGE_FLAG_LINK) == 0 || + (ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return; + + for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd); ) { + if (sc->sge_cdata.sge_tx_cnt > (SGE_TX_RING_CNT - + SGE_MAXTXSEGS)) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + if (sge_encap(sc, &m_head)) { + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + queued++; + /* + * If there's a BPF listener, bounce a copy of this frame + * to him. + */ + BPF_MTAP(ifp, m_head); + } + + if (queued > 0) { + bus_dmamap_sync(sc->sge_cdata.sge_tx_tag, + sc->sge_cdata.sge_tx_dmamap, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + CSR_WRITE_4(sc, TX_CTL, 0x1a00 | TX_CTL_ENB | TX_CTL_POLL); + sc->sge_timer = 5; + } +} + +static void +sge_init(void *arg) +{ + struct sge_softc *sc; + + sc = arg; + SGE_LOCK(sc); + sge_init_locked(sc); + SGE_UNLOCK(sc); +} + +static void +sge_init_locked(struct sge_softc *sc) +{ + struct ifnet *ifp; + struct mii_data *mii; + uint16_t rxfilt; + int i; + + SGE_LOCK_ASSERT(sc); + ifp = sc->sge_ifp; + mii = device_get_softc(sc->sge_miibus); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) + return; + /* + * Cancel pending I/O and free all RX/TX buffers. + */ + sge_stop(sc); + sge_reset(sc); + + /* Init circular RX list. */ + if (sge_list_rx_init(sc) == ENOBUFS) { + device_printf(sc->sge_dev, "no memory for Rx buffers\n"); + sge_stop(sc); + return; + } + /* Init TX descriptors. */ + sge_list_tx_init(sc); + /* + * Load the address of the RX and TX lists. + */ + CSR_WRITE_4(sc, TX_DESC, SGE_ADDR_LO(sc->sge_ldata.sge_tx_paddr)); + CSR_WRITE_4(sc, RX_DESC, SGE_ADDR_LO(sc->sge_ldata.sge_rx_paddr)); + + CSR_WRITE_4(sc, TxMacControl, 0x60); + CSR_WRITE_4(sc, RxWakeOnLan, 0); + CSR_WRITE_4(sc, RxWakeOnLanData, 0); + /* Allow receiving VLAN frames. */ + if ((sc->sge_flags & SGE_FLAG_SIS190) == 0) + CSR_WRITE_2(sc, RxMPSControl, + ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN + SGE_RX_PAD_BYTES); + else + CSR_WRITE_2(sc, RxMPSControl, ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN); + + for (i = 0; i < ETHER_ADDR_LEN; i++) + CSR_WRITE_1(sc, RxMacAddr + i, IF_LLADDR(ifp)[i]); + /* Configure RX MAC. */ + rxfilt = 0; + if ((sc->sge_flags & SGE_FLAG_SIS190) == 0) + rxfilt |= RXMAC_STRIP_FCS | RXMAC_PAD_ENB; + CSR_WRITE_2(sc, RxMacControl, rxfilt); + sge_rxfilter(sc); + sge_setvlan(sc); + + /* Initialize default speed/duplex information. */ + if ((sc->sge_flags & SGE_FLAG_FASTETHER) == 0) + sc->sge_flags |= SGE_FLAG_SPEED_1000; + sc->sge_flags |= SGE_FLAG_FDX; + if ((sc->sge_flags & SGE_FLAG_RGMII) != 0) + CSR_WRITE_4(sc, StationControl, 0x04008001); + else + CSR_WRITE_4(sc, StationControl, 0x04000001); + /* + * XXX Try to mitigate interrupts. + */ + CSR_WRITE_4(sc, IntrControl, 0x08880000); +#ifdef notyet + if (sc->sge_intrcontrol != 0) + CSR_WRITE_4(sc, IntrControl, sc->sge_intrcontrol); + if (sc->sge_intrtimer != 0) + CSR_WRITE_4(sc, IntrTimer, sc->sge_intrtimer); +#endif + + /* + * Clear and enable interrupts. + */ + CSR_WRITE_4(sc, IntrStatus, 0xFFFFFFFF); + CSR_WRITE_4(sc, IntrMask, SGE_INTRS); + + /* Enable receiver and transmitter. */ + CSR_WRITE_4(sc, TX_CTL, 0x1a00 | TX_CTL_ENB); + CSR_WRITE_4(sc, RX_CTL, 0x1a00 | 0x000c | RX_CTL_POLL | RX_CTL_ENB); + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + sc->sge_flags &= ~SGE_FLAG_LINK; + mii_mediachg(mii); + callout_reset(&sc->sge_stat_ch, hz, sge_tick, sc); +} + +/* + * Set media options. + */ +static int +sge_ifmedia_upd(struct ifnet *ifp) +{ + struct sge_softc *sc; + struct mii_data *mii; + int error; + + sc = ifp->if_softc; + SGE_LOCK(sc); + mii = device_get_softc(sc->sge_miibus); + if (mii->mii_instance) { + struct mii_softc *miisc; + LIST_FOREACH(miisc, &mii->mii_phys, mii_list) + mii_phy_reset(miisc); + } + error = mii_mediachg(mii); + SGE_UNLOCK(sc); + + return (error); +} + +/* + * Report current media status. + */ +static void +sge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct sge_softc *sc; + struct mii_data *mii; + + sc = ifp->if_softc; + SGE_LOCK(sc); + mii = device_get_softc(sc->sge_miibus); + if ((ifp->if_flags & IFF_UP) == 0) { + SGE_UNLOCK(sc); + return; + } + mii_pollstat(mii); + SGE_UNLOCK(sc); + ifmr->ifm_active = mii->mii_media_active; + ifmr->ifm_status = mii->mii_media_status; +} + +static int +sge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct sge_softc *sc; + struct ifreq *ifr; + struct mii_data *mii; + int error = 0, mask, reinit; + + sc = ifp->if_softc; + ifr = (struct ifreq *)data; + + switch(command) { + case SIOCSIFFLAGS: + SGE_LOCK(sc); + if ((ifp->if_flags & IFF_UP) != 0) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && + ((ifp->if_flags ^ sc->sge_if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) != 0) + sge_rxfilter(sc); + else + sge_init_locked(sc); + } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) + sge_stop(sc); + sc->sge_if_flags = ifp->if_flags; + SGE_UNLOCK(sc); + break; + case SIOCSIFCAP: + SGE_LOCK(sc); + reinit = 0; + mask = ifr->ifr_reqcap ^ ifp->if_capenable; + if ((mask & IFCAP_TXCSUM) != 0 && + (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { + ifp->if_capenable ^= IFCAP_TXCSUM; + if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) + ifp->if_hwassist |= SGE_CSUM_FEATURES; + else + ifp->if_hwassist &= ~SGE_CSUM_FEATURES; + } + if ((mask & IFCAP_RXCSUM) != 0 && + (ifp->if_capabilities & IFCAP_RXCSUM) != 0) + ifp->if_capenable ^= IFCAP_RXCSUM; + if ((mask & IFCAP_VLAN_HWCSUM) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) + ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; + if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && + (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { + /* + * Due to unknown reason, toggling VLAN hardware + * tagging require interface reinitialization. + */ + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + reinit = 1; + } + if (reinit > 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + sge_init_locked(sc); + } + SGE_UNLOCK(sc); + VLAN_CAPABILITIES(ifp); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + SGE_LOCK(sc); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) + sge_rxfilter(sc); + SGE_UNLOCK(sc); + break; + case SIOCGIFMEDIA: + case SIOCSIFMEDIA: + mii = device_get_softc(sc->sge_miibus); + error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); + break; + default: + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + +static void +sge_watchdog(struct sge_softc *sc) +{ + struct ifnet *ifp; + + SGE_LOCK_ASSERT(sc); + if (sc->sge_timer == 0 || --sc->sge_timer > 0) + return; + + ifp = sc->sge_ifp; + if ((sc->sge_flags & SGE_FLAG_LINK) == 0) { + if (1 || bootverbose) + device_printf(sc->sge_dev, + "watchdog timeout (lost link)\n"); + ifp->if_oerrors++; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + sge_init_locked(sc); + return; + } + device_printf(sc->sge_dev, "watchdog timeout\n"); + ifp->if_oerrors++; + + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + sge_init_locked(sc); + if (!IFQ_DRV_IS_EMPTY(&sc->sge_ifp->if_snd)) + sge_start_locked(ifp); +} + +/* + * Stop the adapter and free any mbufs allocated to the + * RX and TX lists. + */ +static void +sge_stop(struct sge_softc *sc) +{ + struct ifnet *ifp; + + ifp = sc->sge_ifp; + + SGE_LOCK_ASSERT(sc); + + sc->sge_timer = 0; + callout_stop(&sc->sge_stat_ch); + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + CSR_WRITE_4(sc, IntrMask, 0); + CSR_READ_4(sc, IntrMask); + CSR_WRITE_4(sc, IntrStatus, 0xffffffff); + /* Stop TX/RX MAC. */ + CSR_WRITE_4(sc, TX_CTL, 0x1a00); + CSR_WRITE_4(sc, RX_CTL, 0x1a00); + /* XXX Can we assume active DMA cycles gone? */ + DELAY(2000); + CSR_WRITE_4(sc, IntrMask, 0); + CSR_WRITE_4(sc, IntrStatus, 0xffffffff); + + sc->sge_flags &= ~SGE_FLAG_LINK; + sge_list_rx_free(sc); + sge_list_tx_free(sc); +} diff --git a/sys/dev/sge/if_sgereg.h b/sys/dev/sge/if_sgereg.h new file mode 100644 index 00000000000..c06072cdeb9 --- /dev/null +++ b/sys/dev/sge/if_sgereg.h @@ -0,0 +1,368 @@ +/*- + * Copyright (c) 2008, 2009, 2010 Nikolay Denev + * Copyright (c) 2007, 2008 Alexander Pohoyda + * Copyright (c) 1997, 1998, 1999 + * Bill Paul . All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Bill Paul. + * 4. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AUTHORS OR + * THE VOICES IN THEIR HEADS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IF_SGEREG_H +#define _IF_SGEREG_H + +/* + * SiS PCI vendor ID. + */ +#define SIS_VENDORID 0x1039 + +/* + * SiS PCI device IDs + */ +#define SIS_DEVICEID_190 0x0190 +#define SIS_DEVICEID_191 0x0191 + +#define TX_CTL 0x00 +#define TX_DESC 0x04 +#define Reserved0 0x08 +#define TX_NEXT 0x0c + +#define RX_CTL 0x10 +#define RX_DESC 0x14 +#define Reserved1 0x18 +#define RX_NEXT 0x1c + +#define IntrStatus 0x20 +#define IntrMask 0x24 +#define IntrControl 0x28 +#define IntrTimer 0x2c + +#define PMControl 0x30 +#define Reserved2 0x34 +#define ROMControl 0x38 +#define ROMInterface 0x3c +#define StationControl 0x40 +#define GMIIControl 0x44 +#define GMacIOCR 0x48 +#define GMacIOCTL 0x4c +#define TxMacControl 0x50 +#define TxMacTimeLimit 0x54 +#define RGMIIDelay 0x58 +#define Reserved3 0x5c +#define RxMacControl 0x60 /* 1 WORD */ +#define RxMacAddr 0x62 /* 6x BYTE */ +#define RxHashTable 0x68 /* 1 LONG */ +#define RxHashTable2 0x6c /* 1 LONG */ +#define RxWakeOnLan 0x70 +#define RxWakeOnLanData 0x74 +#define RxMPSControl 0x78 +#define Reserved4 0x7c + +/* + * IntrStatus Register Content + */ +#define INTR_SOFT 0x40000000 +#define INTR_TIMER 0x20000000 +#define INTR_PAUSE_FRAME 0x00080000 +#define INTR_MAGIC_FRAME 0x00040000 +#define INTR_WAKE_FRAME 0x00020000 +#define INTR_LINK 0x00010000 +#define INTR_RX_IDLE 0x00000080 +#define INTR_RX_DONE 0x00000040 +#define INTR_TXQ1_IDLE 0x00000020 +#define INTR_TXQ1_DONE 0x00000010 +#define INTR_TX_IDLE 0x00000008 +#define INTR_TX_DONE 0x00000004 +#define INTR_RX_HALT 0x00000002 +#define INTR_TX_HALT 0x00000001 + +#define SGE_INTRS \ + (INTR_RX_IDLE | INTR_RX_DONE | INTR_TXQ1_IDLE | \ + INTR_TXQ1_DONE |INTR_TX_IDLE | INTR_TX_DONE | \ + INTR_TX_HALT | INTR_RX_HALT) + +/* + * RxStatusDesc Register Content + */ +#define RxRES 0x00200000 +#define RxCRC 0x00080000 +#define RxRUNT 0x00100000 +#define RxRWT 0x00400000 + +/* + * RX_CTL Register Content + */ +#define RX_CTL_POLL 0x00000010 +#define RX_CTL_ENB 0x00000001 + +/* + * TX_CTL Register Content + */ +#define TX_CTL_POLL 0x00000010 +#define TX_CTL_ENB 0x00000001 + +/* + * RxMacControl Register Content + */ +#define AcceptBroadcast 0x0800 +#define AcceptMulticast 0x0400 +#define AcceptMyPhys 0x0200 +#define AcceptAllPhys 0x0100 +#define AcceptErr 0x0020 +#define AcceptRunt 0x0010 +#define RXMAC_STRIP_VLAN 0x0020 +#define RXMAC_STRIP_FCS 0x0010 +#define RXMAC_PAD_ENB 0x0004 + +#define SGE_RX_PAD_BYTES 10 + +/* Station control register. */ +#define SC_LOOPBACK 0x80000000 +#define SC_RGMII 0x00008000 +#define SC_FDX 0x00001000 +#define SC_SPEED_MASK 0x00000c00 +#define SC_SPEED_10 0x00000400 +#define SC_SPEED_100 0x00000800 +#define SC_SPEED_1000 0x00000c00 + +/* + * Gigabit Media Independent Interface CTL register + */ +#define GMI_DATA 0xffff0000 +#define GMI_DATA_SHIFT 16 +#define GMI_REG 0x0000f800 +#define GMI_REG_SHIFT 11 +#define GMI_PHY 0x000007c0 +#define GMI_PHY_SHIFT 6 +#define GMI_OP_WR 0x00000020 +#define GMI_OP_RD 0x00000000 +#define GMI_REQ 0x00000010 +#define GMI_MDIO 0x00000008 +#define GMI_MDDIR 0x00000004 +#define GMI_MDC 0x00000002 +#define GMI_MDEN 0x00000001 + +/* Tx descriptor command bits. */ +#define TDC_OWN 0x80000000 +#define TDC_INTR 0x40000000 +#define TDC_THOL3 0x30000000 +#define TDC_THOL2 0x20000000 +#define TDC_THOL1 0x10000000 +#define TDC_THOL0 0x00000000 +#define TDC_LS 0x08000000 +#define TDC_IP_CSUM 0x04000000 +#define TDC_TCP_CSUM 0x02000000 +#define TDC_UDP_CSUM 0x01000000 +#define TDC_BST 0x00800000 +#define TDC_EXT 0x00400000 +#define TDC_DEF 0x00200000 +#define TDC_BKF 0x00100000 +#define TDC_CRS 0x00080000 +#define TDC_COL 0x00040000 +#define TDC_CRC 0x00020000 +#define TDC_PAD 0x00010000 +#define TDC_VLAN_MASK 0x0000FFFF + +#define SGE_TX_INTR_FRAMES 32 + +/* + * TX descriptor status bits. + */ +#define TDS_INS_VLAN 0x80000000 +#define TDS_OWC 0x00080000 +#define TDS_ABT 0x00040000 +#define TDS_FIFO 0x00020000 +#define TDS_CRS 0x00010000 +#define TDS_COLLS 0x0000ffff +#define SGE_TX_ERROR(x) ((x) & (TDS_OWC | TDS_ABT | TDS_FIFO | TDS_CRS)) +#define TX_ERR_BITS "\20" \ + "\21CRS\22FIFO\23ABT\24OWC" + +/* Rx descriptor command bits. */ +#define RDC_OWN 0x80000000 +#define RDC_INTR 0x40000000 +#define RDC_IP_CSUM 0x20000000 +#define RDC_TCP_CSUM 0x10000000 +#define RDC_UDP_CSUM 0x08000000 +#define RDC_IP_CSUM_OK 0x04000000 +#define RDC_TCP_CSUM_OK 0x02000000 +#define RDC_UDP_CSUM_OK 0x01000000 +#define RDC_WAKEUP 0x00400000 +#define RDC_MAGIC 0x00200000 +#define RDC_PAUSE 0x00100000 +#define RDC_BCAST 0x000c0000 +#define RDC_MCAST 0x00080000 +#define RDC_UCAST 0x00040000 +#define RDC_CRCOFF 0x00020000 +#define RDC_PREADD 0x00010000 +#define RDC_VLAN_MASK 0x0000FFFF + +/* + * RX descriptor status bits + */ +#define RDS_VLAN 0x80000000 +#define RDS_DESCS 0x3f000000 +#define RDS_ABORT 0x00800000 +#define RDS_SHORT 0x00400000 +#define RDS_LIMIT 0x00200000 +#define RDS_MIIER 0x00100000 +#define RDS_OVRUN 0x00080000 +#define RDS_NIBON 0x00040000 +#define RDS_COLON 0x00020000 +#define RDS_CRCOK 0x00010000 +#define SGE_RX_ERROR(x) \ + ((x) & (RDS_COLON | RDS_NIBON | RDS_OVRUN | RDS_MIIER | \ + RDS_LIMIT | RDS_SHORT | RDS_ABORT)) +#define SGE_RX_NSEGS(x) (((x) & RDS_DESCS) >> 24) +#define RX_ERR_BITS "\20" \ + "\21CRCOK\22COLON\23NIBON\24OVRUN" \ + "\25MIIER\26LIMIT\27SHORT\30ABORT" \ + "\40VLAN" + +#define RING_END 0x80000000 +#define SGE_RX_BYTES(x) ((x) & 0xFFFF) +#define SGE_INC(x, y) (x) = (((x) + 1) % y) + +/* Taken from Solaris driver */ +#define EI_DATA 0xffff0000 +#define EI_DATA_SHIFT 16 +#define EI_OFFSET 0x0000fc00 +#define EI_OFFSET_SHIFT 10 +#define EI_OP 0x00000300 +#define EI_OP_SHIFT 8 +#define EI_OP_RD (2 << EI_OP_SHIFT) +#define EI_OP_WR (1 << EI_OP_SHIFT) +#define EI_REQ 0x00000080 +#define EI_DO 0x00000008 +#define EI_DI 0x00000004 +#define EI_CLK 0x00000002 +#define EI_CS 0x00000001 + +/* + * EEPROM Addresses + */ +#define EEPROMSignature 0x00 +#define EEPROMCLK 0x01 +#define EEPROMInfo 0x02 +#define EEPROMMACAddr 0x03 + +struct sge_desc { + uint32_t sge_sts_size; + uint32_t sge_cmdsts; + uint32_t sge_ptr; + uint32_t sge_flags; +}; + +#define SGE_RX_RING_CNT 256 /* [8, 1024] */ +#define SGE_TX_RING_CNT 256 /* [8, 8192] */ +#define SGE_DESC_ALIGN 16 +#define SGE_MAXTXSEGS 16 +#define SGE_RX_BUF_ALIGN sizeof(uint64_t) + +#define SGE_RX_RING_SZ (SGE_RX_RING_CNT * sizeof(struct sge_desc)) +#define SGE_TX_RING_SZ (SGE_TX_RING_CNT * sizeof(struct sge_desc)) +#define SGE_ADDR_LO(x) ((uint64_t) (x) & 0xFFFFFFFF) + +struct sge_list_data { + struct sge_desc *sge_rx_ring; + struct sge_desc *sge_tx_ring; + /* physical bus addresses of sge_rx_ring/sge_tx_ring */ + bus_addr_t sge_rx_paddr; + bus_addr_t sge_tx_paddr; +}; + +struct sge_txdesc { + struct mbuf *tx_m; + bus_dmamap_t tx_dmamap; + int tx_ndesc; +}; + +struct sge_rxdesc { + struct mbuf *rx_m; + bus_dmamap_t rx_dmamap; +}; + +struct sge_chain_data { + bus_dma_tag_t sge_tag; + bus_dma_tag_t sge_rx_tag; + bus_dma_tag_t sge_tx_tag; + bus_dmamap_t sge_rx_dmamap; + bus_dmamap_t sge_tx_dmamap; + bus_dma_tag_t sge_txmbuf_tag; + bus_dma_tag_t sge_rxmbuf_tag; + struct sge_txdesc sge_txdesc[SGE_TX_RING_CNT]; + struct sge_rxdesc sge_rxdesc[SGE_RX_RING_CNT]; + bus_dmamap_t sge_rx_spare_map; + int sge_rx_cons; + int sge_tx_prod; + int sge_tx_cons; + int sge_tx_cnt; +}; + +struct sge_type { + uint16_t sge_vid; + uint16_t sge_did; + char *sge_name; +}; + +struct sge_softc { + struct ifnet *sge_ifp; /* interface info */ + struct resource *sge_res; + int sge_res_id; + int sge_res_type; + struct resource *sge_irq; + void *sge_intrhand; + device_t sge_dev; + device_t sge_miibus; + uint8_t sge_rev; + struct sge_list_data sge_ldata; + struct sge_chain_data sge_cdata; + struct callout sge_stat_ch; + int sge_timer; + int sge_flags; +#define SGE_FLAG_FASTETHER 0x0001 +#define SGE_FLAG_SIS190 0x0002 +#define SGE_FLAG_RGMII 0x0010 +#define SGE_FLAG_SPEED_1000 0x2000 +#define SGE_FLAG_FDX 0x4000 +#define SGE_FLAG_LINK 0x8000 + int sge_if_flags; + int sge_intrcontrol; + int sge_intrtimer; + struct mtx sge_mtx; +}; + +#define SGE_LOCK(_sc) mtx_lock(&(_sc)->sge_mtx) +#define SGE_UNLOCK(_sc) mtx_unlock(&(_sc)->sge_mtx) +#define SGE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sge_mtx, MA_OWNED) + +#define SGE_TIMEOUT 1000 + +#endif /* _IF_SGEREG_H */ diff --git a/sys/dev/siis/siis.c b/sys/dev/siis/siis.c index f03fba98027..937d04cfa1d 100644 --- a/sys/dev/siis/siis.c +++ b/sys/dev/siis/siis.c @@ -164,6 +164,7 @@ siis_attach(device_t dev) rman_fini(&ctlr->sc_iomem); return (error); } + pci_enable_busmaster(dev); /* Reset controller */ siis_resume(dev); /* Number of HW channels */ @@ -447,6 +448,8 @@ siis_ch_attach(device_t dev) ch->user[i].bytecount = 8192; ch->user[i].tags = SIIS_MAX_SLOTS; ch->curr[i] = ch->user[i]; + if (ch->pm_level) + ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ; } mtx_init(&ch->mtx, "SIIS channel lock", NULL, MTX_DEF); rid = ch->unit; @@ -1609,6 +1612,23 @@ siis_sata_connect(struct siis_channel *ch) return (1); } +static int +siis_check_ids(device_t dev, union ccb *ccb) +{ + + if (ccb->ccb_h.target_id > 15) { + ccb->ccb_h.status = CAM_TID_INVALID; + xpt_done(ccb); + return (-1); + } + if (ccb->ccb_h.target_lun != 0) { + ccb->ccb_h.status = CAM_LUN_INVALID; + xpt_done(ccb); + return (-1); + } + return (0); +} + static void siisaction(struct cam_sim *sim, union ccb *ccb) { @@ -1625,9 +1645,12 @@ siisaction(struct cam_sim *sim, union ccb *ccb) /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: - if (ch->devices == 0) { + if (siis_check_ids(dev, ccb)) + return; + if (ch->devices == 0 || + (ch->pm_present == 0 && + ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; - xpt_done(ccb); break; } /* Check for command collision. */ @@ -1639,7 +1662,7 @@ siisaction(struct cam_sim *sim, union ccb *ccb) return; } siis_begin_transaction(dev, ccb); - break; + return; case XPT_EN_LUN: /* Enable LUN as a target */ case XPT_TARGET_IO: /* Execute target I/O request */ case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ @@ -1647,13 +1670,14 @@ siisaction(struct cam_sim *sim, union ccb *ccb) case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct siis_device *d; + if (siis_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -1675,8 +1699,9 @@ siisaction(struct cam_sim *sim, union ccb *ccb) } if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS) d->atapi = cts->xport_specific.sata.atapi; + if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) + d->caps = cts->xport_specific.sata.caps; ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } case XPT_GET_TRAN_SETTINGS: @@ -1686,6 +1711,8 @@ siisaction(struct cam_sim *sim, union ccb *ccb) struct siis_device *d; uint32_t status; + if (siis_check_ids(dev, ccb)) + return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else @@ -1706,9 +1733,17 @@ siisaction(struct cam_sim *sim, union ccb *ccb) cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; } + cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; + if (ch->pm_level) + cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; + cts->xport_specific.sata.caps &= + ch->user[ccb->ccb_h.target_id].caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } else { cts->xport_specific.sata.revision = d->revision; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; + cts->xport_specific.sata.caps = d->caps; + cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } cts->xport_specific.sata.mode = d->mode; cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; @@ -1721,48 +1756,16 @@ siisaction(struct cam_sim *sim, union ccb *ccb) cts->xport_specific.sata.atapi = d->atapi; cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; } -#if 0 - case XPT_CALC_GEOMETRY: - { - struct ccb_calc_geometry *ccg; - uint32_t size_mb; - uint32_t secs_per_cylinder; - - ccg = &ccb->ccg; - size_mb = ccg->volume_size - / ((1024L * 1024L) / ccg->block_size); - if (size_mb >= 1024 && (aha->extended_trans != 0)) { - if (size_mb >= 2048) { - ccg->heads = 255; - ccg->secs_per_track = 63; - } else { - ccg->heads = 128; - ccg->secs_per_track = 32; - } - } else { - ccg->heads = 64; - ccg->secs_per_track = 32; - } - secs_per_cylinder = ccg->heads * ccg->secs_per_track; - ccg->cylinders = ccg->volume_size / secs_per_cylinder; - ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); - break; - } -#endif case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ siis_reset(dev); ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done(ccb); break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; case XPT_PATH_INQ: /* Path routing inquiry */ { @@ -1789,14 +1792,13 @@ siisaction(struct cam_sim *sim, union ccb *ccb) cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; cpi->ccb_h.status = CAM_REQ_CMP; cpi->maxio = MAXPHYS; - xpt_done(ccb); break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; - xpt_done(ccb); break; } + xpt_done(ccb); } static void diff --git a/sys/dev/siis/siis.h b/sys/dev/siis/siis.h index 92b4e26d13a..53176287d31 100644 --- a/sys/dev/siis/siis.h +++ b/sys/dev/siis/siis.h @@ -358,6 +358,7 @@ struct siis_device { u_int bytecount; u_int atapi; u_int tags; + u_int caps; }; /* structure describing an ATA channel */ diff --git a/sys/dev/sis/if_sis.c b/sys/dev/sis/if_sis.c index aac46f7bda0..19833423c82 100644 --- a/sys/dev/sis/if_sis.c +++ b/sys/dev/sis/if_sis.c @@ -1483,15 +1483,6 @@ sis_rxeof(struct sis_softc *sc) return (rx_npkts); } -static void -sis_rxeoc(struct sis_softc *sc) -{ - - SIS_LOCK_ASSERT(sc); - sis_rxeof(sc); - sis_initl(sc); -} - /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. @@ -1614,7 +1605,7 @@ sis_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) status = CSR_READ_4(sc, SIS_ISR); if (status & (SIS_ISR_RX_ERR|SIS_ISR_RX_OFLOW)) - sis_rxeoc(sc); + ifp->if_ierrors++; if (status & (SIS_ISR_RX_IDLE)) SIS_SETBIT(sc, SIS_CSR, SIS_CSR_RX_ENABLE); @@ -1672,7 +1663,7 @@ sis_intr(void *arg) sis_rxeof(sc); if (status & SIS_ISR_RX_OFLOW) - sis_rxeoc(sc); + ifp->if_ierrors++; if (status & (SIS_ISR_RX_IDLE)) SIS_SETBIT(sc, SIS_CSR, SIS_CSR_RX_ENABLE); @@ -2017,7 +2008,7 @@ sis_initl(struct sis_softc *sc) CSR_WRITE_4(sc, NS_PHY_PAGE, 0x0001); reg = CSR_READ_4(sc, NS_PHY_DSPCFG) & 0xfff; CSR_WRITE_4(sc, NS_PHY_DSPCFG, reg | 0x1000); - DELAY(100000); + DELAY(100); reg = CSR_READ_4(sc, NS_PHY_TDATA) & 0xff; if ((reg & 0x0080) == 0 || (reg > 0xd8 && reg <= 0xff)) { device_printf(sc->sis_dev, diff --git a/sys/dev/smc/if_smc.c b/sys/dev/smc/if_smc.c index f7f54076a96..6d9ba592679 100644 --- a/sys/dev/smc/if_smc.c +++ b/sys/dev/smc/if_smc.c @@ -347,7 +347,7 @@ smc_attach(device_t dev) ifp->if_init = smc_init; ifp->if_ioctl = smc_ioctl; ifp->if_start = smc_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = ifp->if_capenable = 0; diff --git a/sys/dev/sn/if_sn.c b/sys/dev/sn/if_sn.c index 217d811cb0a..0f86f92e76d 100644 --- a/sys/dev/sn/if_sn.c +++ b/sys/dev/sn/if_sn.c @@ -207,8 +207,8 @@ sn_attach(device_t dev) ifp->if_ioctl = snioctl; ifp->if_init = sninit; ifp->if_baudrate = 10000000; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ether_ifattach(ifp, eaddr); diff --git a/sys/dev/snc/dp83932.c b/sys/dev/snc/dp83932.c index c659c82fd2c..0b62053e2fe 100644 --- a/sys/dev/snc/dp83932.c +++ b/sys/dev/snc/dp83932.c @@ -177,7 +177,7 @@ sncconfig(sc, media, nmedia, defmedia, myea) ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = sncinit; ifp->if_mtu = ETHERMTU; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* Initialize media goo. */ ifmedia_init(&sc->sc_media, 0, snc_mediachange, diff --git a/sys/dev/sound/pcm/buffer.c b/sys/dev/sound/pcm/buffer.c index 6e8259202dc..687542e4a84 100644 --- a/sys/dev/sound/pcm/buffer.c +++ b/sys/dev/sound/pcm/buffer.c @@ -565,30 +565,6 @@ sndbuf_updateprevtotal(struct snd_dbuf *b) b->prev_total = b->total; } -unsigned int -snd_xbytes(unsigned int v, unsigned int from, unsigned int to) -{ - unsigned int w, x, y; - - if (from == to) - return v; - - if (from == 0 || to == 0 || v == 0) - return 0; - - x = from; - y = to; - while (y != 0) { - w = x % y; - x = y; - y = w; - } - from /= x; - to /= x; - - return (unsigned int)(((u_int64_t)v * to) / from); -} - unsigned int sndbuf_xbytes(unsigned int v, struct snd_dbuf *from, struct snd_dbuf *to) { diff --git a/sys/dev/sound/pcm/buffer.h b/sys/dev/sound/pcm/buffer.h index 91a63af0760..d079cdbad4f 100644 --- a/sys/dev/sound/pcm/buffer.h +++ b/sys/dev/sound/pcm/buffer.h @@ -111,7 +111,6 @@ u_int64_t sndbuf_getblocks(struct snd_dbuf *b); u_int64_t sndbuf_getprevblocks(struct snd_dbuf *b); u_int64_t sndbuf_gettotal(struct snd_dbuf *b); u_int64_t sndbuf_getprevtotal(struct snd_dbuf *b); -unsigned int snd_xbytes(unsigned int v, unsigned int from, unsigned int to); unsigned int sndbuf_xbytes(unsigned int v, struct snd_dbuf *from, struct snd_dbuf *to); u_int8_t sndbuf_zerodata(u_int32_t fmt); void sndbuf_updateprevtotal(struct snd_dbuf *b); @@ -132,3 +131,14 @@ void sndbuf_dmabounce(struct snd_dbuf *b); #ifdef OSSV4_EXPERIMENT void sndbuf_getpeaks(struct snd_dbuf *b, int *lp, int *rp); #endif + +static inline u_int32_t +snd_xbytes(u_int32_t v, u_int32_t from, u_int32_t to) +{ + + if (from == to) + return (v); + if (from == 0) + return (0); + return ((u_int64_t)v * to / from); +} diff --git a/sys/dev/sound/usb/uaudio.c b/sys/dev/sound/usb/uaudio.c index 069a8c04887..16132667d5a 100644 --- a/sys/dev/sound/usb/uaudio.c +++ b/sys/dev/sound/usb/uaudio.c @@ -91,7 +91,7 @@ static int uaudio_default_rate = 0; /* use rate list */ static int uaudio_default_bits = 32; static int uaudio_default_channels = 0; /* use default */ -#if USB_DEBUG +#ifdef USB_DEBUG static int uaudio_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uaudio, CTLFLAG_RW, 0, "USB uaudio"); @@ -321,7 +321,7 @@ static const struct uaudio_format uaudio_formats[] = { #define UAC_RECORD 3 #define UAC_NCLASSES 4 -#if USB_DEBUG +#ifdef USB_DEBUG static const char *uac_names[] = { "outputs", "inputs", "equalization", "record" }; @@ -406,7 +406,7 @@ static void umidi_init(device_t dev); static int32_t umidi_probe(device_t dev); static int32_t umidi_detach(device_t dev); -#if USB_DEBUG +#ifdef USB_DEBUG static void uaudio_chan_dump_ep_desc( const usb_endpoint_descriptor_audio_t *); static void uaudio_mixer_dump_cluster(uint8_t, @@ -780,7 +780,7 @@ uaudio_detach(device_t dev) * AS - Audio Stream - routines *========================================================================*/ -#if USB_DEBUG +#ifdef USB_DEBUG static void uaudio_chan_dump_ep_desc(const usb_endpoint_descriptor_audio_t *ed) { @@ -1019,7 +1019,7 @@ uaudio_chan_fill_info_sub(struct uaudio_softc *sc, struct usb_device *udev, if ((chan->valid == 0) && usbd_get_iface(udev, curidx)) { chan->valid = 1; -#if USB_DEBUG +#ifdef USB_DEBUG uaudio_chan_dump_ep_desc(ed1); uaudio_chan_dump_ep_desc(ed2); @@ -1689,7 +1689,7 @@ uaudio_mixer_add_ctl(struct uaudio_softc *sc, struct uaudio_mixer_node *mc) uaudio_mixer_add_ctl_sub(sc, mc); -#if USB_DEBUG +#ifdef USB_DEBUG if (uaudio_debug > 2) { uint8_t i; @@ -1708,7 +1708,7 @@ static void uaudio_mixer_add_input(struct uaudio_softc *sc, const struct uaudio_terminal_node *iot, int id) { -#if USB_DEBUG +#ifdef USB_DEBUG const struct usb_audio_input_terminal *d = iot[id].u.it; DPRINTFN(3, "bTerminalId=%d wTerminalType=0x%04x " @@ -1724,7 +1724,7 @@ static void uaudio_mixer_add_output(struct uaudio_softc *sc, const struct uaudio_terminal_node *iot, int id) { -#if USB_DEBUG +#ifdef USB_DEBUG const struct usb_audio_output_terminal *d = iot[id].u.ot; DPRINTFN(3, "bTerminalId=%d wTerminalType=0x%04x " @@ -2257,7 +2257,7 @@ error: return (NULL); } -#if USB_DEBUG +#ifdef USB_DEBUG static void uaudio_mixer_dump_cluster(uint8_t id, const struct uaudio_terminal_node *iot) { @@ -2350,7 +2350,7 @@ done: return (r); } -#if USB_DEBUG +#ifdef USB_DEBUG struct uaudio_tt_to_string { uint16_t terminal_type; @@ -2856,7 +2856,7 @@ uaudio_mixer_fill_info(struct uaudio_softc *sc, struct usb_device *udev, (iot + i)->root = iot; } while (i--); -#if USB_DEBUG +#ifdef USB_DEBUG i = ID_max; do { uint8_t j; diff --git a/sys/dev/sym/sym_hipd.c b/sys/dev/sym/sym_hipd.c index a43c816c65f..5050a5f9a9b 100644 --- a/sys/dev/sym/sym_hipd.c +++ b/sys/dev/sym/sym_hipd.c @@ -87,6 +87,12 @@ __FBSDID("$FreeBSD$"); #include #include + +#ifdef __sparc64__ +#include +#include +#endif + #include #include @@ -98,10 +104,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include -#include - /* Short and quite clear integer types */ typedef int8_t s8; typedef int16_t s16; @@ -2682,6 +2684,9 @@ static int sym_prepare_setting(hcb_p np, struct sym_nvram *nvram) */ np->myaddr = 255; sym_nvram_setup_host (np, nvram); +#ifdef __sparc64__ + np->myaddr = OF_getscsinitid(np->device); +#endif /* * Get SCSI addr of host adapter (set by bios?). diff --git a/sys/dev/ti/if_ti.c b/sys/dev/ti/if_ti.c index 20130bcd15f..7eeee36fe27 100644 --- a/sys/dev/ti/if_ti.c +++ b/sys/dev/ti/if_ti.c @@ -1488,10 +1488,8 @@ ti_newbuf_jumbo(sc, idx, m_old) } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { - vm_page_lock_queues(); vm_page_unwire(frame, 0); vm_page_free(frame); - vm_page_unlock_queues(); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); diff --git a/sys/dev/uart/uart_dev_ns8250.c b/sys/dev/uart/uart_dev_ns8250.c index c01fd615009..b93e254d519 100644 --- a/sys/dev/uart/uart_dev_ns8250.c +++ b/sys/dev/uart/uart_dev_ns8250.c @@ -604,7 +604,7 @@ ns8250_bus_ipend(struct uart_softc *sc) if (ipend == 0) ns8250_clrint(bas); uart_unlock(sc->sc_hwmtx); - return ((sc->sc_leaving) ? 0 : ipend); + return (ipend); } static int diff --git a/sys/dev/usb/controller/ehci.c b/sys/dev/usb/controller/ehci.c index 28ad987b31d..12cfe53680c 100644 --- a/sys/dev/usb/controller/ehci.c +++ b/sys/dev/usb/controller/ehci.c @@ -89,7 +89,7 @@ __FBSDID("$FreeBSD$"); ((ehci_softc_t *)(((uint8_t *)(bus)) - \ ((uint8_t *)&(((ehci_softc_t *)0)->sc_bus)))) -#if USB_DEBUG +#ifdef USB_DEBUG static int ehcidebug = 0; static int ehcinohighspeed = 0; static int ehciiaadbug = 0; @@ -258,7 +258,7 @@ ehci_init(ehci_softc_t *sc) usb_callout_init_mtx(&sc->sc_tmo_pcd, &sc->sc_bus.bus_mtx, 0); usb_callout_init_mtx(&sc->sc_tmo_poll, &sc->sc_bus.bus_mtx, 0); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehciiaadbug) sc->sc_flags |= EHCI_SCFLG_IAADBUG; if (ehcilostintrbug) @@ -486,7 +486,7 @@ ehci_init(ehci_softc_t *sc) usb_bus_mem_flush_all(&sc->sc_bus, &ehci_iterate_hw_softc); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug) { ehci_dump_sqh(sc, sc->sc_async_p_last); } @@ -685,7 +685,7 @@ ehci_shutdown(ehci_softc_t *sc) } } -#if USB_DEBUG +#ifdef USB_DEBUG static void ehci_dump_regs(ehci_softc_t *sc) { @@ -1229,7 +1229,7 @@ ehci_non_isoc_done_sub(struct usb_xfer *xfer) xfer->td_transfer_cache = td; -#if USB_DEBUG +#ifdef USB_DEBUG if (status & EHCI_QTD_STATERRS) { DPRINTFN(11, "error, addr=%d, endpt=0x%02x, frame=0x%02x" "status=%s%s%s%s%s%s%s%s\n", @@ -1260,7 +1260,7 @@ ehci_non_isoc_done(struct usb_xfer *xfer) DPRINTFN(13, "xfer=%p endpoint=%p transfer done\n", xfer, xfer->endpoint); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 10) { ehci_softc_t *sc = EHCI_BUS2SC(xfer->xroot->bus); @@ -1527,7 +1527,7 @@ ehci_interrupt(ehci_softc_t *sc) DPRINTFN(16, "real interrupt\n"); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 15) { ehci_dump_regs(sc); } @@ -1548,7 +1548,7 @@ ehci_interrupt(ehci_softc_t *sc) if (status & EHCI_STS_HSE) { printf("%s: unrecoverable error, " "controller halted\n", __FUNCTION__); -#if USB_DEBUG +#ifdef USB_DEBUG ehci_dump_regs(sc); ehci_dump_isoc(sc); #endif @@ -1978,7 +1978,7 @@ ehci_setup_standard_chain(struct usb_xfer *xfer, ehci_qh_t **qh_last) xfer->td_transfer_last = td; -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 8) { DPRINTF("nexttog=%d; data before transfer:\n", xfer->endpoint->toggle_next); @@ -2106,7 +2106,7 @@ ehci_isoc_fs_done(ehci_softc_t *sc, struct usb_xfer *xfer) if (pp_last >= &sc->sc_isoc_fs_p_last[EHCI_VIRTUAL_FRAMELIST_COUNT]) { pp_last = &sc->sc_isoc_fs_p_last[0]; } -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 15) { DPRINTF("isoc FS-TD\n"); ehci_dump_sitd(sc, td); @@ -2160,7 +2160,7 @@ ehci_isoc_hs_done(ehci_softc_t *sc, struct usb_xfer *xfer) if (pp_last >= &sc->sc_isoc_hs_p_last[EHCI_VIRTUAL_FRAMELIST_COUNT]) { pp_last = &sc->sc_isoc_hs_p_last[0]; } -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 15) { DPRINTF("isoc HS-TD\n"); ehci_dump_itd(sc, td); @@ -2224,7 +2224,7 @@ ehci_device_done(struct usb_xfer *xfer, usb_error_t error) if ((methods == &ehci_device_bulk_methods) || (methods == &ehci_device_ctrl_methods)) { -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 8) { DPRINTF("nexttog=%d; data after transfer:\n", xfer->endpoint->toggle_next); @@ -2509,7 +2509,7 @@ ehci_device_isoc_fs_enter(struct usb_xfer *xfer) uint8_t sb; uint8_t error; -#if USB_DEBUG +#ifdef USB_DEBUG uint8_t once = 1; #endif @@ -2593,7 +2593,7 @@ ehci_device_isoc_fs_enter(struct usb_xfer *xfer) /* reuse sitd_portaddr and sitd_back from last transfer */ if (*plen > xfer->max_frame_size) { -#if USB_DEBUG +#ifdef USB_DEBUG if (once) { once = 0; printf("%s: frame length(%d) exceeds %d " @@ -2683,7 +2683,7 @@ ehci_device_isoc_fs_enter(struct usb_xfer *xfer) } usb_pc_cpu_flush(td->page_cache); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 15) { DPRINTF("FS-TD %d\n", nframes); ehci_dump_sitd(sc, td); @@ -2800,7 +2800,7 @@ ehci_device_isoc_hs_enter(struct usb_xfer *xfer) uint8_t td_no; uint8_t page_no; -#if USB_DEBUG +#ifdef USB_DEBUG uint8_t once = 1; #endif @@ -2878,7 +2878,7 @@ ehci_device_isoc_hs_enter(struct usb_xfer *xfer) } /* range check */ if (*plen > xfer->max_frame_size) { -#if USB_DEBUG +#ifdef USB_DEBUG if (once) { once = 0; printf("%s: frame length(%d) exceeds %d bytes " @@ -2962,7 +2962,7 @@ ehci_device_isoc_hs_enter(struct usb_xfer *xfer) td->itd_status[td_no - 1] |= htohc32(sc, EHCI_ITD_IOC); } usb_pc_cpu_flush(td->page_cache); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcidebug > 15) { DPRINTF("HS-TD %d\n", nframes); ehci_dump_itd(sc, td); @@ -3398,7 +3398,7 @@ ehci_roothub_exec(struct usb_device *udev, break; case UHF_PORT_RESET: DPRINTFN(6, "reset port %d\n", index); -#if USB_DEBUG +#ifdef USB_DEBUG if (ehcinohighspeed) { /* * Connect USB device to companion diff --git a/sys/dev/usb/controller/uhci.c b/sys/dev/usb/controller/uhci.c index 837a26f16a1..f87907c4925 100644 --- a/sys/dev/usb/controller/uhci.c +++ b/sys/dev/usb/controller/uhci.c @@ -82,7 +82,7 @@ __FBSDID("$FreeBSD$"); ((uhci_softc_t *)(((uint8_t *)(bus)) - \ ((uint8_t *)&(((uhci_softc_t *)0)->sc_bus)))) -#if USB_DEBUG +#ifdef USB_DEBUG static int uhcidebug = 0; static int uhcinoloop = 0; @@ -459,7 +459,7 @@ uhci_init(uhci_softc_t *sc) usb_callout_init_mtx(&sc->sc_root_intr, &sc->sc_bus.bus_mtx, 0); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 2) { uhci_dumpregs(sc); } @@ -668,7 +668,7 @@ uhci_suspend(uhci_softc_t *sc) { USB_BUS_LOCK(&sc->sc_bus); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 2) { uhci_dumpregs(sc); } @@ -712,7 +712,7 @@ uhci_resume(uhci_softc_t *sc) uhci_start(sc); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 2) { uhci_dumpregs(sc); } @@ -724,7 +724,7 @@ uhci_resume(uhci_softc_t *sc) uhci_do_poll(&sc->sc_bus); } -#if USB_DEBUG +#ifdef USB_DEBUG static void uhci_dumpregs(uhci_softc_t *sc) { @@ -855,7 +855,7 @@ uhci_add_loop(uhci_softc_t *sc) struct uhci_qh *qh_lst; struct uhci_qh *qh_rec; -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcinoloop) { return; } @@ -878,7 +878,7 @@ uhci_rem_loop(uhci_softc_t *sc) { struct uhci_qh *qh_lst; -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcinoloop) { return; } @@ -1046,7 +1046,7 @@ uhci_isoc_done(uhci_softc_t *sc, struct usb_xfer *xfer) if (pp_last >= &sc->sc_isoc_p_last[UHCI_VFRAMELIST_COUNT]) { pp_last = &sc->sc_isoc_p_last[0]; } -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 5) { DPRINTF("isoc TD\n"); uhci_dump_td(td); @@ -1177,7 +1177,7 @@ uhci_non_isoc_done_sub(struct usb_xfer *xfer) xfer->endpoint->toggle_next = (token & UHCI_TD_SET_DT(1)) ? 0 : 1; -#if USB_DEBUG +#ifdef USB_DEBUG if (status & UHCI_TD_ERROR) { DPRINTFN(11, "error, addr=%d, endpt=0x%02x, frame=0x%02x " "status=%s%s%s%s%s%s%s%s%s%s%s\n", @@ -1207,7 +1207,7 @@ uhci_non_isoc_done(struct usb_xfer *xfer) DPRINTFN(13, "xfer=%p endpoint=%p transfer done\n", xfer, xfer->endpoint); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 10) { uhci_dump_tds(xfer->td_transfer_first); } @@ -1446,7 +1446,7 @@ uhci_interrupt(uhci_softc_t *sc) DPRINTFN(16, "real interrupt\n"); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 15) { uhci_dumpregs(sc); } @@ -1460,7 +1460,7 @@ uhci_interrupt(uhci_softc_t *sc) UHCI_STS_HCPE | UHCI_STS_HCH)) { if (status & UHCI_STS_RD) { -#if USB_DEBUG +#ifdef USB_DEBUG printf("%s: resume detect\n", __FUNCTION__); #endif @@ -1477,7 +1477,7 @@ uhci_interrupt(uhci_softc_t *sc) /* no acknowledge needed */ DPRINTF("%s: host controller halted\n", __FUNCTION__); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 0) { uhci_dump_all(sc); } @@ -1839,7 +1839,7 @@ uhci_setup_standard_chain(struct usb_xfer *xfer) xfer->td_transfer_last = td; -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 8) { DPRINTF("nexttog=%d; data before transfer:\n", xfer->endpoint->toggle_next); @@ -2155,7 +2155,7 @@ uhci_device_isoc_enter(struct usb_xfer *xfer) uint32_t temp; uint32_t *plen; -#if USB_DEBUG +#ifdef USB_DEBUG uint8_t once = 1; #endif @@ -2227,7 +2227,7 @@ uhci_device_isoc_enter(struct usb_xfer *xfer) pp_last = &sc->sc_isoc_p_last[0]; } if (*plen > xfer->max_frame_size) { -#if USB_DEBUG +#ifdef USB_DEBUG if (once) { once = 0; printf("%s: frame length(%d) exceeds %d " @@ -2279,7 +2279,7 @@ uhci_device_isoc_enter(struct usb_xfer *xfer) usb_pc_cpu_flush(td->page_cache); -#if USB_DEBUG +#ifdef USB_DEBUG if (uhcidebug > 5) { DPRINTF("TD %d\n", nframes); uhci_dump_td(td); diff --git a/sys/dev/usb/controller/uss820dci.c b/sys/dev/usb/controller/uss820dci.c index c910c1f21dd..74221695f84 100644 --- a/sys/dev/usb/controller/uss820dci.c +++ b/sys/dev/usb/controller/uss820dci.c @@ -77,7 +77,7 @@ #define USS820_DCI_PC2SC(pc) \ USS820_DCI_BUS2SC(USB_DMATAG_TO_XROOT((pc)->tag_parent)->bus) -#if USB_DEBUG +#ifdef USB_DEBUG static int uss820dcidebug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uss820dci, CTLFLAG_RW, 0, "USB uss820dci"); diff --git a/sys/dev/usb/input/atp.c b/sys/dev/usb/input/atp.c index 6c0ce2c7036..c0fe6d4b386 100644 --- a/sys/dev/usb/input/atp.c +++ b/sys/dev/usb/input/atp.c @@ -116,7 +116,7 @@ __FBSDID("$FreeBSD$"); /* Tunables */ SYSCTL_NODE(_hw_usb, OID_AUTO, atp, CTLFLAG_RW, 0, "USB atp"); -#if USB_DEBUG +#ifdef USB_DEBUG enum atp_log_level { ATP_LLEVEL_DISABLED = 0, ATP_LLEVEL_ERROR, @@ -126,7 +126,7 @@ enum atp_log_level { static int atp_debug = ATP_LLEVEL_ERROR; /* the default is to only log errors */ SYSCTL_INT(_hw_usb_atp, OID_AUTO, debug, CTLFLAG_RW, &atp_debug, ATP_LLEVEL_ERROR, "ATP debug level"); -#endif /* #if USB_DEBUG */ +#endif /* USB_DEBUG */ static u_int atp_touch_timeout = ATP_TOUCH_TIMEOUT; SYSCTL_INT(_hw_usb_atp, OID_AUTO, touch_timeout, CTLFLAG_RW, &atp_touch_timeout, @@ -1055,7 +1055,7 @@ atp_update_strokes(struct atp_softc *sc, atp_pspan *pspans_x, if (pspans_y[j].matched == FALSE) break; } if ((i < n_xpspans) && (j < n_ypspans)) { -#if USB_DEBUG +#ifdef USB_DEBUG if (atp_debug >= ATP_LLEVEL_INFO) { printf("unmatched pspans:"); for (; i < n_xpspans; i++) { @@ -1072,7 +1072,7 @@ atp_update_strokes(struct atp_softc *sc, atp_pspan *pspans_x, } printf("\n"); } -#endif /* #if USB_DEBUG */ +#endif /* USB_DEBUG */ if ((n_xpspans == 1) && (n_ypspans == 1)) /* The common case of a single pair of new pspans. */ atp_add_stroke(sc, &pspans_x[0], &pspans_y[0]); @@ -1082,7 +1082,7 @@ atp_update_strokes(struct atp_softc *sc, atp_pspan *pspans_x, pspans_y, n_ypspans); } -#if USB_DEBUG +#ifdef USB_DEBUG if (atp_debug >= ATP_LLEVEL_INFO) { for (i = 0; i < sc->sc_n_strokes; i++) { atp_stroke *stroke = &sc->sc_strokes[i]; @@ -1110,7 +1110,7 @@ atp_update_strokes(struct atp_softc *sc, atp_pspan *pspans_x, if (sc->sc_n_strokes) printf("\n"); } -#endif /* #if USB_DEBUG */ +#endif /* USB_DEBUG */ return (movement); } diff --git a/sys/dev/usb/input/uhid.c b/sys/dev/usb/input/uhid.c index a471991f62e..d36ecf36240 100644 --- a/sys/dev/usb/input/uhid.c +++ b/sys/dev/usb/input/uhid.c @@ -76,7 +76,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uhid_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uhid, CTLFLAG_RW, 0, "USB uhid"); diff --git a/sys/dev/usb/input/ukbd.c b/sys/dev/usb/input/ukbd.c index 8584f8d505d..034a781b2fe 100644 --- a/sys/dev/usb/input/ukbd.c +++ b/sys/dev/usb/input/ukbd.c @@ -88,7 +88,7 @@ __FBSDID("$FreeBSD$"); /* the following file must be included after "ukbdmap.h" */ #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ukbd_debug = 0; static int ukbd_no_leds = 0; @@ -102,8 +102,6 @@ TUNABLE_INT("hw.usb.ukbd.debug", &ukbd_debug); TUNABLE_INT("hw.usb.ukbd.no_leds", &ukbd_no_leds); #endif -#define UPROTO_BOOT_KEYBOARD 1 - #define UKBD_EMULATE_ATSCANCODE 1 #define UKBD_DRIVER_NAME "ukbd" #define UKBD_NMOD 8 /* units */ @@ -614,7 +612,7 @@ ukbd_intr_callback(struct usb_xfer *xfer, usb_error_t error) apple_fn = 1; else apple_fn = 0; -#if USB_DEBUG +#ifdef USB_DEBUG DPRINTF("apple_eject=%u apple_fn=%u\n", apple_eject, apple_fn); @@ -680,7 +678,7 @@ ukbd_set_leds_callback(struct usb_xfer *xfer, usb_error_t error) uint8_t buf[2]; struct ukbd_softc *sc = usbd_xfer_softc(xfer); -#if USB_DEBUG +#ifdef USB_DEBUG if (ukbd_no_leds) return; #endif @@ -770,7 +768,7 @@ ukbd_probe(device_t dev) return (ENXIO); if ((uaa->info.bInterfaceSubClass == UISUBCLASS_BOOT) && - (uaa->info.bInterfaceProtocol == UPROTO_BOOT_KEYBOARD)) { + (uaa->info.bInterfaceProtocol == UIPROTO_BOOT_KEYBOARD)) { if (usb_test_quirk(uaa, UQ_KBD_IGNORE)) return (ENXIO); else diff --git a/sys/dev/usb/input/ums.c b/sys/dev/usb/input/ums.c index a4385b95d11..e1e37becb81 100644 --- a/sys/dev/usb/input/ums.c +++ b/sys/dev/usb/input/ums.c @@ -74,7 +74,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ums_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ums, CTLFLAG_RW, 0, "USB ums"); @@ -494,7 +494,9 @@ ums_attach(device_t dev) int err; uint16_t d_len; uint8_t i; +#ifdef USB_DEBUG uint8_t j; +#endif DPRINTFN(11, "sc=%p\n", sc); @@ -588,7 +590,7 @@ ums_attach(device_t dev) free(d_ptr, M_TEMP); d_ptr = NULL; -#if USB_DEBUG +#ifdef USB_DEBUG for (j = 0; j < UMS_INFO_MAX; j++) { info = &sc->sc_info[j]; diff --git a/sys/dev/usb/misc/udbp.c b/sys/dev/usb/misc/udbp.c index ada6b3c0a29..5eef3103c31 100644 --- a/sys/dev/usb/misc/udbp.c +++ b/sys/dev/usb/misc/udbp.c @@ -94,7 +94,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int udbp_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, udbp, CTLFLAG_RW, 0, "USB udbp"); diff --git a/sys/dev/usb/net/if_aue.c b/sys/dev/usb/net/if_aue.c index b508474e468..a8c0a548b51 100644 --- a/sys/dev/usb/net/if_aue.c +++ b/sys/dev/usb/net/if_aue.c @@ -100,7 +100,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int aue_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, aue, CTLFLAG_RW, 0, "USB aue"); diff --git a/sys/dev/usb/net/if_axe.c b/sys/dev/usb/net/if_axe.c index 9772f401b68..e255c855c94 100644 --- a/sys/dev/usb/net/if_axe.c +++ b/sys/dev/usb/net/if_axe.c @@ -123,7 +123,7 @@ __FBSDID("$FreeBSD$"); */ #define AXE_178_MAX_FRAME_BURST 1 -#if USB_DEBUG +#ifdef USB_DEBUG static int axe_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, axe, CTLFLAG_RW, 0, "USB axe"); diff --git a/sys/dev/usb/net/if_cdce.c b/sys/dev/usb/net/if_cdce.c index 2fcb0ff0b5b..b5e7fd464bd 100644 --- a/sys/dev/usb/net/if_cdce.c +++ b/sys/dev/usb/net/if_cdce.c @@ -108,7 +108,7 @@ static uether_fn_t cdce_setpromisc; static uint32_t cdce_m_crc32(struct mbuf *, uint32_t, uint32_t); -#if USB_DEBUG +#ifdef USB_DEBUG static int cdce_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, cdce, CTLFLAG_RW, 0, "USB CDC-Ethernet"); diff --git a/sys/dev/usb/net/if_cue.c b/sys/dev/usb/net/if_cue.c index 05ff1a54a88..e26b29f6fd2 100644 --- a/sys/dev/usb/net/if_cue.c +++ b/sys/dev/usb/net/if_cue.c @@ -122,7 +122,7 @@ static int cue_getmac(struct cue_softc *, void *); static uint32_t cue_mchash(const uint8_t *); static void cue_reset(struct cue_softc *); -#if USB_DEBUG +#ifdef USB_DEBUG static int cue_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, cue, CTLFLAG_RW, 0, "USB cue"); diff --git a/sys/dev/usb/net/if_kue.c b/sys/dev/usb/net/if_kue.c index 5d35da40bd5..4eee0945d9f 100644 --- a/sys/dev/usb/net/if_kue.c +++ b/sys/dev/usb/net/if_kue.c @@ -163,7 +163,7 @@ static int kue_ctl(struct kue_softc *, uint8_t, uint8_t, uint16_t, static int kue_load_fw(struct kue_softc *); static void kue_reset(struct kue_softc *); -#if USB_DEBUG +#ifdef USB_DEBUG static int kue_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, kue, CTLFLAG_RW, 0, "USB kue"); diff --git a/sys/dev/usb/net/if_rue.c b/sys/dev/usb/net/if_rue.c index 3e77305aa97..f0d16086b20 100644 --- a/sys/dev/usb/net/if_rue.c +++ b/sys/dev/usb/net/if_rue.c @@ -97,7 +97,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int rue_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, rue, CTLFLAG_RW, 0, "USB rue"); diff --git a/sys/dev/usb/net/if_udav.c b/sys/dev/usb/net/if_udav.c index ae30b142da6..f56e9b03829 100644 --- a/sys/dev/usb/net/if_udav.c +++ b/sys/dev/usb/net/if_udav.c @@ -185,7 +185,7 @@ static const struct usb_ether_methods udav_ue_methods = { .ue_mii_sts = udav_ifmedia_status, }; -#if USB_DEBUG +#ifdef USB_DEBUG static int udav_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, udav, CTLFLAG_RW, 0, "USB udav"); diff --git a/sys/dev/usb/net/uhso.c b/sys/dev/usb/net/uhso.c index a810ec48c5d..21a23c9f6fc 100644 --- a/sys/dev/usb/net/uhso.c +++ b/sys/dev/usb/net/uhso.c @@ -1471,8 +1471,8 @@ static int uhso_attach_ifnet(struct uhso_softc *sc, struct usb_interface *iface, ifp->if_output = uhso_if_output; ifp->if_flags = 0; ifp->if_softc = sc; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); if_attach(ifp); diff --git a/sys/dev/usb/net/usb_ethernet.c b/sys/dev/usb/net/usb_ethernet.c index 6cf446016e7..bd75cef9657 100644 --- a/sys/dev/usb/net/usb_ethernet.c +++ b/sys/dev/usb/net/usb_ethernet.c @@ -214,8 +214,8 @@ ue_attach_post_task(struct usb_proc_msg *_task) ifp->if_ioctl = uether_ioctl; ifp->if_start = ue_start; ifp->if_init = ue_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ue->ue_ifp = ifp; diff --git a/sys/dev/usb/serial/u3g.c b/sys/dev/usb/serial/u3g.c index 7ac1bd3310d..ffe88126dfa 100644 --- a/sys/dev/usb/serial/u3g.c +++ b/sys/dev/usb/serial/u3g.c @@ -63,7 +63,7 @@ #include -#if USB_DEBUG +#ifdef USB_DEBUG static int u3g_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, u3g, CTLFLAG_RW, 0, "USB 3g"); diff --git a/sys/dev/usb/serial/ubsa.c b/sys/dev/usb/serial/ubsa.c index 32639fcc403..58175094922 100644 --- a/sys/dev/usb/serial/ubsa.c +++ b/sys/dev/usb/serial/ubsa.c @@ -93,7 +93,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ubsa_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ubsa, CTLFLAG_RW, 0, "USB ubsa"); @@ -405,9 +405,8 @@ ubsa_cfg_set_break(struct ucom_softc *ucom, uint8_t onoff) static int ubsa_pre_param(struct ucom_softc *ucom, struct termios *t) { - struct ubsa_softc *sc = ucom->sc_parent; - DPRINTF("sc = %p\n", sc); + DPRINTF("sc = %p\n", ucom->sc_parent); switch (t->c_ospeed) { case B0: diff --git a/sys/dev/usb/serial/ubser.c b/sys/dev/usb/serial/ubser.c index 06c96c026e5..3f2dc2d30c7 100644 --- a/sys/dev/usb/serial/ubser.c +++ b/sys/dev/usb/serial/ubser.c @@ -115,7 +115,7 @@ __FBSDID("$FreeBSD$"); #define VENDOR_SET_BREAK 0x02 #define VENDOR_CLEAR_BREAK 0x03 -#if USB_DEBUG +#ifdef USB_DEBUG static int ubser_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ubser, CTLFLAG_RW, 0, "USB ubser"); diff --git a/sys/dev/usb/serial/uchcom.c b/sys/dev/usb/serial/uchcom.c index 9fea8492fca..92f3a922345 100644 --- a/sys/dev/usb/serial/uchcom.c +++ b/sys/dev/usb/serial/uchcom.c @@ -101,7 +101,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uchcom_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uchcom, CTLFLAG_RW, 0, "USB uchcom"); diff --git a/sys/dev/usb/serial/uftdi.c b/sys/dev/usb/serial/uftdi.c index 76a08de30a0..b9d1d347f29 100644 --- a/sys/dev/usb/serial/uftdi.c +++ b/sys/dev/usb/serial/uftdi.c @@ -73,7 +73,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uftdi_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uftdi, CTLFLAG_RW, 0, "USB uftdi"); diff --git a/sys/dev/usb/serial/ulpt.c b/sys/dev/usb/serial/ulpt.c index f053f6d3751..58dc367237b 100644 --- a/sys/dev/usb/serial/ulpt.c +++ b/sys/dev/usb/serial/ulpt.c @@ -72,7 +72,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ulpt_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ulpt, CTLFLAG_RW, 0, "USB ulpt"); diff --git a/sys/dev/usb/serial/umodem.c b/sys/dev/usb/serial/umodem.c index 39afdad893f..a7d00c9d23a 100644 --- a/sys/dev/usb/serial/umodem.c +++ b/sys/dev/usb/serial/umodem.c @@ -116,7 +116,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int umodem_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, umodem, CTLFLAG_RW, 0, "USB umodem"); diff --git a/sys/dev/usb/serial/umoscom.c b/sys/dev/usb/serial/umoscom.c index 0481b192c8b..3a36a44f71c 100644 --- a/sys/dev/usb/serial/umoscom.c +++ b/sys/dev/usb/serial/umoscom.c @@ -48,7 +48,7 @@ #include -#if USB_DEBUG +#ifdef USB_DEBUG static int umoscom_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, umoscom, CTLFLAG_RW, 0, "USB umoscom"); diff --git a/sys/dev/usb/serial/uplcom.c b/sys/dev/usb/serial/uplcom.c index c5d58e46dcb..ae88805bbbb 100644 --- a/sys/dev/usb/serial/uplcom.c +++ b/sys/dev/usb/serial/uplcom.c @@ -116,7 +116,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uplcom_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uplcom, CTLFLAG_RW, 0, "USB uplcom"); diff --git a/sys/dev/usb/serial/usb_serial.c b/sys/dev/usb/serial/usb_serial.c index 871ae54190b..6573d8e0c2e 100644 --- a/sys/dev/usb/serial/usb_serial.c +++ b/sys/dev/usb/serial/usb_serial.c @@ -104,7 +104,7 @@ __FBSDID("$FreeBSD$"); SYSCTL_NODE(_hw_usb, OID_AUTO, ucom, CTLFLAG_RW, 0, "USB ucom"); -#if USB_DEBUG +#ifdef USB_DEBUG static int ucom_debug = 0; SYSCTL_INT(_hw_usb_ucom, OID_AUTO, debug, CTLFLAG_RW, diff --git a/sys/dev/usb/serial/uslcom.c b/sys/dev/usb/serial/uslcom.c index d97cc2c542c..f20c12ac147 100644 --- a/sys/dev/usb/serial/uslcom.c +++ b/sys/dev/usb/serial/uslcom.c @@ -50,7 +50,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uslcom_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uslcom, CTLFLAG_RW, 0, "USB uslcom"); diff --git a/sys/dev/usb/serial/uvisor.c b/sys/dev/usb/serial/uvisor.c index f0e4257a372..77ff31f5bd2 100644 --- a/sys/dev/usb/serial/uvisor.c +++ b/sys/dev/usb/serial/uvisor.c @@ -78,7 +78,7 @@ #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uvisor_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uvisor, CTLFLAG_RW, 0, "USB uvisor"); @@ -392,7 +392,7 @@ uvisor_init(struct uvisor_softc *sc, struct usb_device *udev, struct usb_config goto done; } } -#if USB_DEBUG +#ifdef USB_DEBUG if (sc->sc_flag & UVISOR_FLAG_VISOR) { uint16_t i, np; const char *desc; diff --git a/sys/dev/usb/serial/uvscom.c b/sys/dev/usb/serial/uvscom.c index 4e3ff576422..f220587da12 100644 --- a/sys/dev/usb/serial/uvscom.c +++ b/sys/dev/usb/serial/uvscom.c @@ -68,7 +68,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int uvscom_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, uvscom, CTLFLAG_RW, 0, "USB uvscom"); diff --git a/sys/dev/usb/storage/umass.c b/sys/dev/usb/storage/umass.c index b14bb47e8cb..a66357b5db5 100644 --- a/sys/dev/usb/storage/umass.c +++ b/sys/dev/usb/storage/umass.c @@ -146,7 +146,7 @@ __FBSDID("$FreeBSD$"); #define UMASS_USB_FLAGS #endif -#if USB_DEBUG +#ifdef USB_DEBUG #define DIF(m, x) \ do { \ if (umass_debug & (m)) { x ; } \ @@ -488,7 +488,7 @@ static uint8_t umass_no_transform(struct umass_softc *, uint8_t *, uint8_t); static uint8_t umass_std_transform(struct umass_softc *, union ccb *, uint8_t *, uint8_t); -#if USB_DEBUG +#ifdef USB_DEBUG static void umass_bbb_dump_cbw(struct umass_softc *, umass_bbb_cbw_t *); static void umass_bbb_dump_csw(struct umass_softc *, umass_bbb_csw_t *); static void umass_cbi_dump_cmd(struct umass_softc *, void *, uint8_t); @@ -917,7 +917,7 @@ umass_attach(device_t dev) } sc->sc_iface_no = id->bInterfaceNumber; -#if USB_DEBUG +#ifdef USB_DEBUG device_printf(dev, " "); switch (sc->sc_proto & UMASS_PROTO_COMMAND) { @@ -3012,7 +3012,7 @@ umass_std_transform(struct umass_softc *sc, union ccb *ccb, return (1); } -#if USB_DEBUG +#ifdef USB_DEBUG static void umass_bbb_dump_cbw(struct umass_softc *sc, umass_bbb_cbw_t *cbw) { diff --git a/sys/dev/usb/storage/urio.c b/sys/dev/usb/storage/urio.c index 403c4c2b216..1aef8469f43 100644 --- a/sys/dev/usb/storage/urio.c +++ b/sys/dev/usb/storage/urio.c @@ -78,7 +78,7 @@ __FBSDID("$FreeBSD$"); #include -#if USB_DEBUG +#ifdef USB_DEBUG static int urio_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, urio, CTLFLAG_RW, 0, "USB urio"); diff --git a/sys/dev/usb/storage/ustorage_fs.c b/sys/dev/usb/storage/ustorage_fs.c index 10047e11c22..52cfd6eb286 100644 --- a/sys/dev/usb/storage/ustorage_fs.c +++ b/sys/dev/usb/storage/ustorage_fs.c @@ -64,7 +64,7 @@ #define USB_DEBUG_VAR ustorage_fs_debug #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ustorage_fs_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ustorage_fs, CTLFLAG_RW, 0, "USB ustorage_fs"); diff --git a/sys/dev/usb/usb_compat_linux.c b/sys/dev/usb/usb_compat_linux.c index fc28a1c5060..89aaa8f7aac 100644 --- a/sys/dev/usb/usb_compat_linux.c +++ b/sys/dev/usb/usb_compat_linux.c @@ -971,7 +971,7 @@ usb_linux_create_usb_device(struct usb_device *udev, device_t dev) udev->devnum = device_get_unit(dev); bcopy(&udev->ddesc, &udev->descriptor, sizeof(udev->descriptor)); - bcopy(udev->default_ep.edesc, &udev->ep0.desc, + bcopy(udev->ctrl_ep.edesc, &udev->ep0.desc, sizeof(udev->ep0.desc)); } } diff --git a/sys/dev/usb/usb_debug.h b/sys/dev/usb/usb_debug.h index b6bfbcfca1a..8718c89bef9 100644 --- a/sys/dev/usb/usb_debug.h +++ b/sys/dev/usb/usb_debug.h @@ -34,7 +34,7 @@ extern int usb_debug; /* Check if USB debugging is enabled. */ #ifdef USB_DEBUG_VAR -#if (USB_DEBUG != 0) +#ifdef USB_DEBUG #define DPRINTFN(n,fmt,...) do { \ if ((USB_DEBUG_VAR) >= (n)) { \ printf("%s: " fmt, \ diff --git a/sys/dev/usb/usb_dev.c b/sys/dev/usb/usb_dev.c index dffabad29b9..2ac5b2e4edf 100644 --- a/sys/dev/usb/usb_dev.c +++ b/sys/dev/usb/usb_dev.c @@ -284,7 +284,7 @@ error: usbd_enum_unlock(cpd->udev); if (--(cpd->udev->refcount) == 0) { - cv_signal(cpd->udev->default_cv + 1); + cv_signal(&cpd->udev->ref_cv); } } mtx_unlock(&usb_ref_lock); @@ -352,7 +352,7 @@ usb_unref_device(struct usb_cdev_privdata *cpd, } if (crd->is_uref) { if (--(cpd->udev->refcount) == 0) { - cv_signal(cpd->udev->default_cv + 1); + cv_signal(&cpd->udev->ref_cv); } crd->is_uref = 0; } @@ -500,7 +500,7 @@ usb_fifo_create(struct usb_cdev_privdata *cpd, /* update some fields */ f->fifo_index = n + USB_FIFO_TX; f->dev_ep_index = e; - f->priv_mtx = udev->default_mtx; + f->priv_mtx = &udev->device_mtx; f->priv_sc0 = ep; f->methods = &usb_ugen_methods; f->iface_index = ep->iface_index; @@ -527,7 +527,7 @@ usb_fifo_create(struct usb_cdev_privdata *cpd, /* update some fields */ f->fifo_index = n + USB_FIFO_RX; f->dev_ep_index = e; - f->priv_mtx = udev->default_mtx; + f->priv_mtx = &udev->device_mtx; f->priv_sc0 = ep; f->methods = &usb_ugen_methods; f->iface_index = ep->iface_index; @@ -615,7 +615,7 @@ usb_dev_get_ep(struct usb_device *udev, uint8_t ep_index, uint8_t dir) uint8_t ep_dir; if (ep_index == 0) { - ep = &udev->default_ep; + ep = &udev->ctrl_ep; } else { if (dir == USB_FIFO_RX) { if (udev->flags.usb_mode == USB_MODE_HOST) { diff --git a/sys/dev/usb/usb_device.c b/sys/dev/usb/usb_device.c index d7f0c31c8d3..95020de0ac3 100644 --- a/sys/dev/usb/usb_device.c +++ b/sys/dev/usb/usb_device.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -178,9 +179,9 @@ usbd_get_ep_by_addr(struct usb_device *udev, uint8_t ea_val) /* * The default endpoint is always present and is checked separately: */ - if ((udev->default_ep.edesc) && - ((udev->default_ep.edesc->bEndpointAddress & EA_MASK) == ea_val)) { - ep = &udev->default_ep; + if ((udev->ctrl_ep.edesc) && + ((udev->ctrl_ep.edesc->bEndpointAddress & EA_MASK) == ea_val)) { + ep = &udev->ctrl_ep; goto found; } return (NULL); @@ -296,11 +297,11 @@ usbd_get_endpoint(struct usb_device *udev, uint8_t iface_index, * address" and "any direction" returns the first endpoint of the * interface. "iface_index" and "direction" is ignored: */ - if ((udev->default_ep.edesc) && - ((udev->default_ep.edesc->bEndpointAddress & ea_mask) == ea_val) && - ((udev->default_ep.edesc->bmAttributes & type_mask) == type_val) && + if ((udev->ctrl_ep.edesc) && + ((udev->ctrl_ep.edesc->bEndpointAddress & ea_mask) == ea_val) && + ((udev->ctrl_ep.edesc->bmAttributes & type_mask) == type_val) && (!index)) { - ep = &udev->default_ep; + ep = &udev->ctrl_ep; goto found; } return (NULL); @@ -654,7 +655,7 @@ usb_config_parse(struct usb_device *udev, uint8_t iface_index, uint8_t cmd) goto cleanup; if (cmd == USB_CFG_INIT) { - sx_assert(udev->default_sx + 1, SA_LOCKED); + sx_assert(&udev->enum_sx, SA_LOCKED); /* check for in-use endpoints */ @@ -1061,7 +1062,7 @@ usb_detach_device(struct usb_device *udev, uint8_t iface_index, } DPRINTFN(4, "udev=%p\n", udev); - sx_assert(udev->default_sx + 1, SA_LOCKED); + sx_assert(&udev->enum_sx, SA_LOCKED); /* * First detach the child to give the child's detach routine a @@ -1379,7 +1380,7 @@ usb_suspend_resume(struct usb_device *udev, uint8_t do_suspend) } DPRINTFN(4, "udev=%p do_suspend=%d\n", udev, do_suspend); - sx_assert(udev->default_sx + 1, SA_LOCKED); + sx_assert(&udev->enum_sx, SA_LOCKED); USB_BUS_LOCK(udev->bus); /* filter the suspend events */ @@ -1418,13 +1419,13 @@ usbd_clear_stall_proc(struct usb_proc_msg *_pm) /* Change lock */ USB_BUS_UNLOCK(udev->bus); - mtx_lock(udev->default_mtx); + mtx_lock(&udev->device_mtx); /* Start clear stall callback */ - usbd_transfer_start(udev->default_xfer[1]); + usbd_transfer_start(udev->ctrl_xfer[1]); /* Change lock */ - mtx_unlock(udev->default_mtx); + mtx_unlock(&udev->device_mtx); USB_BUS_LOCK(udev->bus); } @@ -1490,16 +1491,16 @@ usb_alloc_device(device_t parent_dev, struct usb_bus *bus, return (NULL); } /* initialise our SX-lock */ - sx_init(udev->default_sx, "0123456789ABCDEF - USB device SX lock" + depth); + sx_init_flags(&udev->ctrl_sx, "USB device SX lock", SX_DUPOK); /* initialise our SX-lock */ - sx_init(udev->default_sx + 1, "0123456789ABCDEF - USB config SX lock" + depth); + sx_init_flags(&udev->enum_sx, "USB config SX lock", SX_DUPOK); - cv_init(udev->default_cv, "WCTRL"); - cv_init(udev->default_cv + 1, "UGONE"); + cv_init(&udev->ctrlreq_cv, "WCTRL"); + cv_init(&udev->ref_cv, "UGONE"); /* initialise our mutex */ - mtx_init(udev->default_mtx, "USB device mutex", NULL, MTX_DEF); + mtx_init(&udev->device_mtx, "USB device mutex", NULL, MTX_DEF); /* initialise generic clear stall */ udev->cs_msg[0].hdr.pm_callback = &usbd_clear_stall_proc; @@ -1528,13 +1529,13 @@ usb_alloc_device(device_t parent_dev, struct usb_bus *bus, udev->refcount = 1; /* set up default endpoint descriptor */ - udev->default_ep_desc.bLength = sizeof(udev->default_ep_desc); - udev->default_ep_desc.bDescriptorType = UDESC_ENDPOINT; - udev->default_ep_desc.bEndpointAddress = USB_CONTROL_ENDPOINT; - udev->default_ep_desc.bmAttributes = UE_CONTROL; - udev->default_ep_desc.wMaxPacketSize[0] = USB_MAX_IPACKET; - udev->default_ep_desc.wMaxPacketSize[1] = 0; - udev->default_ep_desc.bInterval = 0; + udev->ctrl_ep_desc.bLength = sizeof(udev->ctrl_ep_desc); + udev->ctrl_ep_desc.bDescriptorType = UDESC_ENDPOINT; + udev->ctrl_ep_desc.bEndpointAddress = USB_CONTROL_ENDPOINT; + udev->ctrl_ep_desc.bmAttributes = UE_CONTROL; + udev->ctrl_ep_desc.wMaxPacketSize[0] = USB_MAX_IPACKET; + udev->ctrl_ep_desc.wMaxPacketSize[1] = 0; + udev->ctrl_ep_desc.bInterval = 0; udev->ddesc.bMaxPacketSize = USB_MAX_IPACKET; udev->speed = speed; @@ -1558,8 +1559,8 @@ usb_alloc_device(device_t parent_dev, struct usb_bus *bus, /* init the default endpoint */ usb_init_endpoint(udev, 0, - &udev->default_ep_desc, - &udev->default_ep); + &udev->ctrl_ep_desc, + &udev->ctrl_ep); /* set device index */ udev->device_index = device_index; @@ -1572,10 +1573,10 @@ usb_alloc_device(device_t parent_dev, struct usb_bus *bus, LIST_INIT(&udev->pd_list); /* Create the control endpoint device */ - udev->default_dev = usb_make_dev(udev, 0, FREAD|FWRITE); + udev->ctrl_dev = usb_make_dev(udev, 0, FREAD|FWRITE); /* Create a link from /dev/ugenX.X to the default endpoint */ - make_dev_alias(udev->default_dev, udev->ugen_name); + make_dev_alias(udev->ctrl_dev, udev->ugen_name); #endif if (udev->flags.usb_mode == USB_MODE_HOST) { @@ -1834,7 +1835,7 @@ config_done: printf("%s: <%s> at %s\n", udev->ugen_name, udev->manufacturer, device_get_nameunit(udev->bus->bdev)); - usb_notify_addq("+", udev); + usb_notify_addq("ATTACH", udev); #endif done: if (err) { @@ -1980,7 +1981,7 @@ usb_free_device(struct usb_device *udev, uint8_t flag) usb_set_device_state(udev, USB_STATE_DETACHED); #if USB_HAVE_UGEN - usb_notify_addq("-", udev); + usb_notify_addq("DETACH", udev); printf("%s: <%s> at %s (disconnected)\n", udev->ugen_name, udev->manufacturer, device_get_nameunit(bus->bdev)); @@ -2004,24 +2005,24 @@ usb_free_device(struct usb_device *udev, uint8_t flag) mtx_lock(&usb_ref_lock); udev->refcount--; while (udev->refcount != 0) { - cv_wait(udev->default_cv + 1, &usb_ref_lock); + cv_wait(&udev->ref_cv, &usb_ref_lock); } mtx_unlock(&usb_ref_lock); - destroy_dev_sched_cb(udev->default_dev, usb_cdev_cleanup, - udev->default_dev->si_drv1); + destroy_dev_sched_cb(udev->ctrl_dev, usb_cdev_cleanup, + udev->ctrl_dev->si_drv1); #endif if (udev->flags.usb_mode == USB_MODE_DEVICE) { /* stop receiving any control transfers (Device Side Mode) */ - usbd_transfer_unsetup(udev->default_xfer, USB_DEFAULT_XFER_MAX); + usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX); } /* the following will get the device unconfigured in software */ usb_unconfigure(udev, USB_UNCFG_FLAG_FREE_EP0); /* unsetup any leftover default USB transfers */ - usbd_transfer_unsetup(udev->default_xfer, USB_DEFAULT_XFER_MAX); + usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX); /* template unsetup, if any */ (usb_temp_unsetup_p) (udev); @@ -2035,13 +2036,13 @@ usb_free_device(struct usb_device *udev, uint8_t flag) &udev->cs_msg[0], &udev->cs_msg[1]); USB_BUS_UNLOCK(udev->bus); - sx_destroy(udev->default_sx); - sx_destroy(udev->default_sx + 1); + sx_destroy(&udev->ctrl_sx); + sx_destroy(&udev->enum_sx); - cv_destroy(udev->default_cv); - cv_destroy(udev->default_cv + 1); + cv_destroy(&udev->ctrlreq_cv); + cv_destroy(&udev->ref_cv); - mtx_destroy(udev->default_mtx); + mtx_destroy(&udev->device_mtx); #if USB_HAVE_UGEN KASSERT(LIST_FIRST(&udev->pd_list) == NULL, ("leaked cdev entries")); #endif @@ -2347,13 +2348,23 @@ usbd_get_device_index(struct usb_device *udev) * * This function will generate events for dev. *------------------------------------------------------------------------*/ +#ifndef BURN_BRIDGES static void -usb_notify_addq(const char *type, struct usb_device *udev) +usb_notify_addq_compat(const char *type, struct usb_device *udev) { char *data = NULL; + const char *ntype; struct malloc_type *mt; const size_t buf_size = 512; + /* Convert notify type */ + if (strcmp(type, "ATTACH") == 0) + ntype = "+"; + else if (strcmp(type, "DETACH") == 0) + ntype = "-"; + else + return; + mtx_lock(&malloc_mtx); mt = malloc_desc2type("bus"); /* XXX M_BUS */ mtx_unlock(&malloc_mtx); @@ -2378,7 +2389,7 @@ usb_notify_addq(const char *type, struct usb_device *udev) "port=%u " "on " "%s\n", - type, + ntype, udev->ugen_name, UGETW(udev->ddesc.idVendor), UGETW(udev->ddesc.idProduct), @@ -2393,6 +2404,89 @@ usb_notify_addq(const char *type, struct usb_device *udev) devctl_queue_data(data); } +#endif + +static void +usb_notify_addq(const char *type, struct usb_device *udev) +{ + struct usb_interface *iface; + struct sbuf *sb; + int i; + +#ifndef BURN_BRIDGES + usb_notify_addq_compat(type, udev); +#endif + + /* announce the device */ + sb = sbuf_new_auto(); + sbuf_printf(sb, + "cdev=%s " + "vendor=0x%04x " + "product=0x%04x " + "devclass=0x%02x " + "devsubclass=0x%02x " + "sernum=\"%s\" " + "release=0x%04x " + "mode=%s " + "port=%u " + "parent=%s\n", + udev->ugen_name, + UGETW(udev->ddesc.idVendor), + UGETW(udev->ddesc.idProduct), + udev->ddesc.bDeviceClass, + udev->ddesc.bDeviceSubClass, + udev->serial, + UGETW(udev->ddesc.bcdDevice), + (udev->flags.usb_mode == USB_MODE_HOST) ? "host" : "device", + udev->port_no, + udev->parent_hub != NULL ? + udev->parent_hub->ugen_name : + device_get_nameunit(device_get_parent(udev->bus->bdev))); + sbuf_finish(sb); + devctl_notify("USB", "DEVICE", type, sbuf_data(sb)); + sbuf_delete(sb); + + /* announce each interface */ + for (i = 0; i < USB_IFACE_MAX; i++) { + iface = usbd_get_iface(udev, i); + if (iface == NULL) + break; /* end of interfaces */ + if (iface->idesc == NULL) + continue; /* no interface descriptor */ + + sb = sbuf_new_auto(); + sbuf_printf(sb, + "cdev=%s " + "vendor=0x%04x " + "product=0x%04x " + "devclass=0x%02x " + "devsubclass=0x%02x " + "sernum=\"%s\" " + "release=0x%04x " + "mode=%s " + "interface=%d " + "endpoints=%d " + "intclass=0x%02x " + "intsubclass=0x%02x " + "intprotocol=0x%02x\n", + udev->ugen_name, + UGETW(udev->ddesc.idVendor), + UGETW(udev->ddesc.idProduct), + udev->ddesc.bDeviceClass, + udev->ddesc.bDeviceSubClass, + udev->serial, + UGETW(udev->ddesc.bcdDevice), + (udev->flags.usb_mode == USB_MODE_HOST) ? "host" : "device", + iface->idesc->bInterfaceNumber, + iface->idesc->bNumEndpoints, + iface->idesc->bInterfaceClass, + iface->idesc->bInterfaceSubClass, + iface->idesc->bInterfaceProtocol); + sbuf_finish(sb); + devctl_notify("USB", "INTERFACE", type, sbuf_data(sb)); + sbuf_delete(sb); + } +} /*------------------------------------------------------------------------* * usb_fifo_free_wrap @@ -2494,7 +2588,7 @@ usbd_device_attached(struct usb_device *udev) void usbd_enum_lock(struct usb_device *udev) { - sx_xlock(udev->default_sx + 1); + sx_xlock(&udev->enum_sx); /* * NEWBUS LOCK NOTE: We should check if any parent SX locks * are locked before locking Giant. Else the lock can be @@ -2509,7 +2603,7 @@ void usbd_enum_unlock(struct usb_device *udev) { mtx_unlock(&Giant); - sx_xunlock(udev->default_sx + 1); + sx_xunlock(&udev->enum_sx); } /* @@ -2520,5 +2614,5 @@ usbd_enum_unlock(struct usb_device *udev) uint8_t usbd_enum_is_locked(struct usb_device *udev) { - return (sx_xlocked(udev->default_sx + 1)); + return (sx_xlocked(&udev->enum_sx)); } diff --git a/sys/dev/usb/usb_device.h b/sys/dev/usb/usb_device.h index 3afdecf3ae9..08b9fd7ef4f 100644 --- a/sys/dev/usb/usb_device.h +++ b/sys/dev/usb/usb_device.h @@ -30,7 +30,7 @@ struct usb_symlink; /* UGEN */ struct usb_device; /* linux compat */ -#define USB_DEFAULT_XFER_MAX 2 +#define USB_CTRL_XFER_MAX 2 /* "usb_parse_config()" commands */ @@ -113,11 +113,13 @@ struct usb_power_save { struct usb_device { struct usb_clear_stall_msg cs_msg[2]; /* generic clear stall * messages */ - struct sx default_sx[2]; - struct mtx default_mtx[1]; - struct cv default_cv[2]; + struct sx ctrl_sx; + struct sx enum_sx; + struct mtx device_mtx; + struct cv ctrlreq_cv; + struct cv ref_cv; struct usb_interface *ifaces; - struct usb_endpoint default_ep; /* Control Endpoint 0 */ + struct usb_endpoint ctrl_ep; /* Control Endpoint 0 */ struct usb_endpoint *endpoints; struct usb_power_save pwr_save;/* power save data */ struct usb_bus *bus; /* our USB BUS */ @@ -126,13 +128,13 @@ struct usb_device { struct usb_device *parent_hs_hub; /* high-speed parent HUB */ struct usb_config_descriptor *cdesc; /* full config descr */ struct usb_hub *hub; /* only if this is a hub */ - struct usb_xfer *default_xfer[USB_DEFAULT_XFER_MAX]; + struct usb_xfer *ctrl_xfer[USB_CTRL_XFER_MAX]; struct usb_temp_data *usb_template_ptr; struct usb_endpoint *ep_curr; /* current clear stall endpoint */ #if USB_HAVE_UGEN struct usb_fifo *fifo[USB_FIFO_MAX]; struct usb_symlink *ugen_symlink; /* our generic symlink */ - struct cdev *default_dev; /* Control Endpoint 0 device node */ + struct cdev *ctrl_dev; /* Control Endpoint 0 device node */ LIST_HEAD(,usb_fs_privdata) pd_list; char ugen_name[20]; /* name of ugenX.X device */ #endif @@ -164,7 +166,7 @@ struct usb_device { struct usb_device_flags flags; - struct usb_endpoint_descriptor default_ep_desc; /* for endpoint 0 */ + struct usb_endpoint_descriptor ctrl_ep_desc; /* for endpoint 0 */ struct usb_device_descriptor ddesc; /* device descriptor */ char *serial; /* serial number */ diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h index 1f34317d667..8a008cd47dd 100644 --- a/sys/dev/usb/usb_freebsd.h +++ b/sys/dev/usb/usb_freebsd.h @@ -57,10 +57,6 @@ #define USB_HUB_MAX_DEPTH 5 #define USB_EP0_BUFSIZE 1024 /* bytes */ -#ifndef USB_DEBUG -#define USB_DEBUG 1 -#endif - typedef uint32_t usb_timeout_t; /* milliseconds */ typedef uint32_t usb_frlength_t; /* bytes */ typedef uint32_t usb_frcount_t; /* units */ diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c index fb7d5df0fcb..db274683440 100644 --- a/sys/dev/usb/usb_generic.c +++ b/sys/dev/usb/usb_generic.c @@ -81,11 +81,11 @@ static usb_callback_t ugen_read_clear_stall_callback; static usb_callback_t ugen_write_clear_stall_callback; -static usb_callback_t ugen_default_read_callback; -static usb_callback_t ugen_default_write_callback; +static usb_callback_t ugen_ctrl_read_callback; +static usb_callback_t ugen_ctrl_write_callback; static usb_callback_t ugen_isoc_read_callback; static usb_callback_t ugen_isoc_write_callback; -static usb_callback_t ugen_default_fs_callback; +static usb_callback_t ugen_ctrl_fs_callback; static usb_fifo_open_t ugen_open; static usb_fifo_close_t ugen_close; @@ -265,7 +265,7 @@ ugen_open_pipe_write(struct usb_fifo *f) if (f->flag_short) { usb_config[0].flags.force_short_xfer = 1; } - usb_config[0].callback = &ugen_default_write_callback; + usb_config[0].callback = &ugen_ctrl_write_callback; usb_config[0].timeout = f->timeout; usb_config[0].frames = 1; usb_config[0].bufsize = f->bufsize; @@ -335,7 +335,7 @@ ugen_open_pipe_read(struct usb_fifo *f) } usb_config[0].timeout = f->timeout; usb_config[0].frames = 1; - usb_config[0].callback = &ugen_default_read_callback; + usb_config[0].callback = &ugen_ctrl_read_callback; usb_config[0].bufsize = f->bufsize; if (ugen_transfer_setup(f, usb_config, 2)) { @@ -401,7 +401,7 @@ ugen_stop_io(struct usb_fifo *f) } static void -ugen_default_read_callback(struct usb_xfer *xfer, usb_error_t error) +ugen_ctrl_read_callback(struct usb_xfer *xfer, usb_error_t error) { struct usb_fifo *f = usbd_xfer_softc(xfer); struct usb_mbuf *m; @@ -453,7 +453,7 @@ ugen_default_read_callback(struct usb_xfer *xfer, usb_error_t error) } static void -ugen_default_write_callback(struct usb_xfer *xfer, usb_error_t error) +ugen_ctrl_write_callback(struct usb_xfer *xfer, usb_error_t error) { struct usb_fifo *f = usbd_xfer_softc(xfer); usb_frlength_t actlen; @@ -1480,7 +1480,7 @@ ugen_ioctl(struct usb_fifo *f, u_long cmd, void *addr, int fflags) usb_config[0].direction = ed->bEndpointAddress & (UE_DIR_OUT | UE_DIR_IN); usb_config[0].interval = USB_DEFAULT_INTERVAL; usb_config[0].flags.proxy_buffer = 1; - usb_config[0].callback = &ugen_default_fs_callback; + usb_config[0].callback = &ugen_ctrl_fs_callback; usb_config[0].timeout = 0; /* no timeout */ usb_config[0].frames = u.popen->max_frames; usb_config[0].bufsize = u.popen->max_bufsize; @@ -2201,7 +2201,7 @@ ugen_ioctl_post(struct usb_fifo *f, u_long cmd, void *addr, int fflags) } static void -ugen_default_fs_callback(struct usb_xfer *xfer, usb_error_t error) +ugen_ctrl_fs_callback(struct usb_xfer *xfer, usb_error_t error) { ; /* workaround for a bug in "indent" */ diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c index 97797963269..c31f8fa449c 100644 --- a/sys/dev/usb/usb_hub.c +++ b/sys/dev/usb/usb_hub.c @@ -246,7 +246,7 @@ uhub_explore_sub(struct uhub_softc *sc, struct usb_port *up) /* start control transfer, if device mode */ if (child->flags.usb_mode == USB_MODE_DEVICE) { - usbd_default_transfer_setup(child); + usbd_ctrl_transfer_setup(child); } /* if a HUB becomes present, do a recursive HUB explore */ diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c index 03745faa7a5..6150cb1fa14 100644 --- a/sys/dev/usb/usb_request.c +++ b/sys/dev/usb/usb_request.c @@ -68,7 +68,7 @@ #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int usb_pr_poll_delay = USB_PORT_RESET_DELAY; static int usb_pr_recovery_delay = USB_PORT_RESET_RECOVERY; static int usb_ss_delay = 0; @@ -99,7 +99,7 @@ usbd_do_request_callback(struct usb_xfer *xfer, usb_error_t error) usbd_transfer_submit(xfer); break; default: - cv_signal(xfer->xroot->udev->default_cv); + cv_signal(&xfer->xroot->udev->ctrlreq_cv); break; } } @@ -319,7 +319,7 @@ usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx, * is achieved when multiple threads are involved: */ - sx_xlock(udev->default_sx); + sx_xlock(&udev->ctrl_sx); hr_func = usbd_get_hr_func(udev); @@ -374,9 +374,9 @@ usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx, /* * Setup a new USB transfer or use the existing one, if any: */ - usbd_default_transfer_setup(udev); + usbd_ctrl_transfer_setup(udev); - xfer = udev->default_xfer[0]; + xfer = udev->ctrl_xfer[0]; if (xfer == NULL) { /* most likely out of memory */ err = USB_ERR_NOMEM; @@ -433,7 +433,7 @@ usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx, } else { if (xfer->frlengths[0] == 0) { if (xfer->flags.manual_status) { -#if USB_DEBUG +#ifdef USB_DEBUG int temp; temp = usb_ss_delay; @@ -457,7 +457,7 @@ usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx, usbd_transfer_start(xfer); while (usbd_transfer_pending(xfer)) { - cv_wait(udev->default_cv, + cv_wait(&udev->ctrlreq_cv, xfer->xroot->xfer_mtx); } @@ -534,7 +534,7 @@ usbd_do_request_flags(struct usb_device *udev, struct mtx *mtx, USB_XFER_UNLOCK(xfer); done: - sx_xunlock(udev->default_sx); + sx_xunlock(&udev->ctrl_sx); if (mtx) { mtx_lock(mtx); @@ -603,7 +603,7 @@ usbd_req_reset_port(struct usb_device *udev, struct mtx *mtx, uint8_t port) usb_error_t err; uint16_t n; -#if USB_DEBUG +#ifdef USB_DEBUG uint16_t pr_poll_delay; uint16_t pr_recovery_delay; @@ -612,7 +612,7 @@ usbd_req_reset_port(struct usb_device *udev, struct mtx *mtx, uint8_t port) if (err) { goto done; } -#if USB_DEBUG +#ifdef USB_DEBUG /* range check input parameters */ pr_poll_delay = usb_pr_poll_delay; if (pr_poll_delay < 1) { @@ -627,7 +627,7 @@ usbd_req_reset_port(struct usb_device *udev, struct mtx *mtx, uint8_t port) #endif n = 0; while (1) { -#if USB_DEBUG +#ifdef USB_DEBUG /* wait for the device to recover from reset */ usb_pause_mtx(mtx, USB_MS_TO_TICKS(pr_poll_delay)); n += pr_poll_delay; @@ -666,7 +666,7 @@ usbd_req_reset_port(struct usb_device *udev, struct mtx *mtx, uint8_t port) err = USB_ERR_TIMEOUT; goto done; } -#if USB_DEBUG +#ifdef USB_DEBUG /* wait for the device to recover from reset */ usb_pause_mtx(mtx, USB_MS_TO_TICKS(pr_recovery_delay)); #else diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c index 9be1c0f4bde..535d12bfdef 100644 --- a/sys/dev/usb/usb_transfer.c +++ b/sys/dev/usb/usb_transfer.c @@ -72,7 +72,7 @@ struct usb_std_packet_size { static usb_callback_t usb_request_callback; -static const struct usb_config usb_control_ep_cfg[USB_DEFAULT_XFER_MAX] = { +static const struct usb_config usb_control_ep_cfg[USB_CTRL_XFER_MAX] = { /* This transfer is used for generic control endpoint transfers */ @@ -1418,7 +1418,7 @@ usbd_transfer_submit(struct usb_xfer *xfer) xfer, xfer->endpoint, xfer->nframes, USB_GET_DATA_ISREAD(xfer) ? "read" : "write"); -#if USB_DEBUG +#ifdef USB_DEBUG if (USB_DEBUG_VAR > 0) { USB_BUS_LOCK(bus); @@ -2433,8 +2433,8 @@ usbd_pipe_start(struct usb_xfer_queue *pq) if (udev->flags.usb_mode == USB_MODE_DEVICE) { (udev->bus->methods->set_stall) ( udev, NULL, ep, &did_stall); - } else if (udev->default_xfer[1]) { - info = udev->default_xfer[1]->xroot; + } else if (udev->ctrl_xfer[1]) { + info = udev->ctrl_xfer[1]->xroot; usb_proc_msignal( &info->bus->non_giant_callback_proc, &udev->cs_msg[0], &udev->cs_msg[1]); @@ -2757,13 +2757,13 @@ usb_command_wrapper(struct usb_xfer_queue *pq, struct usb_xfer *xfer) } /*------------------------------------------------------------------------* - * usbd_default_transfer_setup + * usbd_ctrl_transfer_setup * * This function is used to setup the default USB control endpoint * transfer. *------------------------------------------------------------------------*/ void -usbd_default_transfer_setup(struct usb_device *udev) +usbd_ctrl_transfer_setup(struct usb_device *udev) { struct usb_xfer *xfer; uint8_t no_resetup; @@ -2774,12 +2774,12 @@ usbd_default_transfer_setup(struct usb_device *udev) return; repeat: - xfer = udev->default_xfer[0]; + xfer = udev->ctrl_xfer[0]; if (xfer) { USB_XFER_LOCK(xfer); no_resetup = ((xfer->address == udev->address) && - (udev->default_ep_desc.wMaxPacketSize[0] == + (udev->ctrl_ep_desc.wMaxPacketSize[0] == udev->ddesc.bMaxPacketSize)); if (udev->flags.usb_mode == USB_MODE_DEVICE) { if (no_resetup) { @@ -2806,13 +2806,13 @@ repeat: /* * Update wMaxPacketSize for the default control endpoint: */ - udev->default_ep_desc.wMaxPacketSize[0] = + udev->ctrl_ep_desc.wMaxPacketSize[0] = udev->ddesc.bMaxPacketSize; /* * Unsetup any existing USB transfer: */ - usbd_transfer_unsetup(udev->default_xfer, USB_DEFAULT_XFER_MAX); + usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX); /* * Try to setup a new USB transfer for the @@ -2820,8 +2820,8 @@ repeat: */ iface_index = 0; if (usbd_transfer_setup(udev, &iface_index, - udev->default_xfer, usb_control_ep_cfg, USB_DEFAULT_XFER_MAX, NULL, - udev->default_mtx)) { + udev->ctrl_xfer, usb_control_ep_cfg, USB_CTRL_XFER_MAX, NULL, + &udev->device_mtx)) { DPRINTFN(0, "could not setup default " "USB transfer\n"); } else { @@ -3001,13 +3001,13 @@ usbd_transfer_poll(struct usb_xfer **ppxfer, uint16_t max) USB_BUS_LOCK(xroot->bus); /* check for clear stall */ - if (udev->default_xfer[1] != NULL) { + if (udev->ctrl_xfer[1] != NULL) { /* poll clear stall start */ pm = &udev->cs_msg[0].hdr; (pm->pm_callback) (pm); /* poll clear stall done thread */ - pm = &udev->default_xfer[1]-> + pm = &udev->ctrl_xfer[1]-> xroot->done_m[0].hdr; (pm->pm_callback) (pm); } diff --git a/sys/dev/usb/usb_transfer.h b/sys/dev/usb/usb_transfer.h index 27f3afffa63..6e08df09ef8 100644 --- a/sys/dev/usb/usb_transfer.h +++ b/sys/dev/usb/usb_transfer.h @@ -123,7 +123,7 @@ void usbd_transfer_done(struct usb_xfer *xfer, usb_error_t error); void usbd_transfer_enqueue(struct usb_xfer_queue *pq, struct usb_xfer *xfer); void usbd_transfer_setup_sub(struct usb_setup_params *parm); -void usbd_default_transfer_setup(struct usb_device *udev); +void usbd_ctrl_transfer_setup(struct usb_device *udev); void usbd_clear_data_toggle(struct usb_device *udev, struct usb_endpoint *ep); usb_callback_t usbd_do_request_callback; diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index 4cf2891af76..a7bac4405e8 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -2515,7 +2515,6 @@ product QUALCOMMINC E0086 0x0086 3G modem product QUALCOMMINC E2002 0x2002 3G modem product QUALCOMMINC E2003 0x2003 3G modem -/* Quanta products */ /* Quanta products */ product QUANTA RW6815_1 0x00ce HP iPAQ rw6815 product QUANTA RT3070 0x0304 RT3070 diff --git a/sys/dev/usb/wlan/if_rum.c b/sys/dev/usb/wlan/if_rum.c index 5b3441b0118..ebb22fe8257 100644 --- a/sys/dev/usb/wlan/if_rum.c +++ b/sys/dev/usb/wlan/if_rum.c @@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int rum_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, rum, CTLFLAG_RW, 0, "USB rum"); @@ -197,6 +197,7 @@ static void rum_enable_tsf(struct rum_softc *); static void rum_update_slot(struct ifnet *); static void rum_set_bssid(struct rum_softc *, const uint8_t *); static void rum_set_macaddr(struct rum_softc *, const uint8_t *); +static void rum_update_mcast(struct ifnet *); static void rum_update_promisc(struct ifnet *); static void rum_setpromisc(struct rum_softc *); static const char *rum_get_rf(int); @@ -478,8 +479,8 @@ rum_attach(device_t self) ifp->if_init = rum_init; ifp->if_ioctl = rum_ioctl; ifp->if_start = rum_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; @@ -514,6 +515,7 @@ rum_attach(device_t self) ic->ic_vap_create = rum_vap_create; ic->ic_vap_delete = rum_vap_delete; + ic->ic_update_mcast = rum_update_mcast; ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), @@ -1815,6 +1817,13 @@ rum_update_promisc(struct ifnet *ifp) RUM_UNLOCK(sc); } +static void +rum_update_mcast(struct ifnet *ifp) +{ + + /* XXX do nothing? */ +} + static const char * rum_get_rf(int rev) { diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index 06d3764ab54..c798c32b823 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$"); #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) -#if USB_DEBUG +#ifdef USB_DEBUG #define RUN_DEBUG #endif @@ -609,8 +609,8 @@ run_attach(device_t self) ifp->if_init = run_init; ifp->if_ioctl = run_ioctl; ifp->if_start = run_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/usb/wlan/if_uath.c b/sys/dev/usb/wlan/if_uath.c index 7838d5ad353..40b7c83da10 100644 --- a/sys/dev/usb/wlan/if_uath.c +++ b/sys/dev/usb/wlan/if_uath.c @@ -438,8 +438,8 @@ uath_attach(device_t dev) ifp->if_ioctl = uath_ioctl; ifp->if_start = uath_start; /* XXX UATH_TX_DATA_LIST_COUNT */ - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic = ifp->if_l2com; diff --git a/sys/dev/usb/wlan/if_upgt.c b/sys/dev/usb/wlan/if_upgt.c index 20f06c6ddea..091b9dfe45b 100644 --- a/sys/dev/usb/wlan/if_upgt.c +++ b/sys/dev/usb/wlan/if_upgt.c @@ -331,7 +331,7 @@ upgt_attach(device_t dev) ifp->if_init = upgt_init; ifp->if_ioctl = upgt_ioctl; ifp->if_start = upgt_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); IFQ_SET_READY(&ifp->if_snd); ic = ifp->if_l2com; diff --git a/sys/dev/usb/wlan/if_ural.c b/sys/dev/usb/wlan/if_ural.c index fc907bcb346..6839bcff1b1 100644 --- a/sys/dev/usb/wlan/if_ural.c +++ b/sys/dev/usb/wlan/if_ural.c @@ -78,7 +78,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int ural_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ural, CTLFLAG_RW, 0, "USB ural"); @@ -467,8 +467,8 @@ ural_attach(device_t self) ifp->if_init = ural_init; ifp->if_ioctl = ural_ioctl; ifp->if_start = ural_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/usb/wlan/if_urtw.c b/sys/dev/usb/wlan/if_urtw.c index 6604268e009..9ff16f3b137 100644 --- a/sys/dev/usb/wlan/if_urtw.c +++ b/sys/dev/usb/wlan/if_urtw.c @@ -867,8 +867,8 @@ urtw_attach(device_t dev) ifp->if_ioctl = urtw_ioctl; ifp->if_start = urtw_start; /* XXX URTW_TX_DATA_LIST_COUNT */ - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic = ifp->if_l2com; diff --git a/sys/dev/usb/wlan/if_zyd.c b/sys/dev/usb/wlan/if_zyd.c index ee143d632ad..1a298f8ff58 100644 --- a/sys/dev/usb/wlan/if_zyd.c +++ b/sys/dev/usb/wlan/if_zyd.c @@ -75,7 +75,7 @@ __FBSDID("$FreeBSD$"); #include #include -#if USB_DEBUG +#ifdef USB_DEBUG static int zyd_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, zyd, CTLFLAG_RW, 0, "USB zyd"); @@ -381,7 +381,7 @@ zyd_attach(device_t dev) ifp->if_init = zyd_init; ifp->if_ioctl = zyd_ioctl; ifp->if_start = zyd_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); IFQ_SET_READY(&ifp->if_snd); ic = ifp->if_l2com; diff --git a/sys/dev/vx/if_vx.c b/sys/dev/vx/if_vx.c index 0dea9bf9093..1ea0faf2b11 100644 --- a/sys/dev/vx/if_vx.c +++ b/sys/dev/vx/if_vx.c @@ -189,7 +189,7 @@ vx_attach(device_t dev) } ifp->if_mtu = ETHERMTU; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = vx_start; ifp->if_ioctl = vx_ioctl; diff --git a/sys/dev/wi/if_wi.c b/sys/dev/wi/if_wi.c index 616c5d7b330..fe0d903d173 100644 --- a/sys/dev/wi/if_wi.c +++ b/sys/dev/wi/if_wi.c @@ -333,8 +333,8 @@ wi_attach(device_t dev) ifp->if_ioctl = wi_ioctl; ifp->if_start = wi_start; ifp->if_init = wi_init; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ic->ic_ifp = ifp; diff --git a/sys/dev/wl/if_wl.c b/sys/dev/wl/if_wl.c index ed291ed8dbd..a95997e4225 100644 --- a/sys/dev/wl/if_wl.c +++ b/sys/dev/wl/if_wl.c @@ -559,7 +559,7 @@ wlattach(device_t device) ifp->if_init = wlinit; ifp->if_start = wlstart; ifp->if_ioctl = wlioctl; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; /* no entries ifp->if_done ifp->if_reset diff --git a/sys/dev/wpi/if_wpi.c b/sys/dev/wpi/if_wpi.c index 9bf9342140a..f6edc916569 100644 --- a/sys/dev/wpi/if_wpi.c +++ b/sys/dev/wpi/if_wpi.c @@ -661,8 +661,8 @@ wpi_attach(device_t dev) ifp->if_init = wpi_init; ifp->if_ioctl = wpi_ioctl; ifp->if_start = wpi_start; - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ieee80211_ifattach(ic, macaddr); diff --git a/sys/dev/xe/if_xe.c b/sys/dev/xe/if_xe.c index f18fd9842c9..77b953ac886 100644 --- a/sys/dev/xe/if_xe.c +++ b/sys/dev/xe/if_xe.c @@ -254,7 +254,7 @@ xe_attach(device_t dev) scp->ifp->if_ioctl = xe_ioctl; scp->ifp->if_init = xe_init; scp->ifp->if_baudrate = 100000000; - IFQ_SET_MAXLEN(&scp->ifp->if_snd, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&scp->ifp->if_snd, ifqmaxlen); /* Initialise the ifmedia structure */ ifmedia_init(scp->ifm, 0, xe_media_change, xe_media_status); diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index d67d3544384..15cf455bff7 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -1,19 +1,27 @@ -/* - * +/*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c index 79037ba6313..4bd672867cc 100644 --- a/sys/fs/devfs/devfs_devs.c +++ b/sys/fs/devfs/devfs_devs.c @@ -115,17 +115,21 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, 0, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); struct cdev * -devfs_alloc(void) +devfs_alloc(int flags) { struct cdev_priv *cdp; struct cdev *cdev; struct timespec ts; - cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO | M_WAITOK); + cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO | + ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK)); + if (cdp == NULL) + return (NULL); cdp->cdp_dirents = &cdp->cdp_dirent0; cdp->cdp_dirent0 = NULL; cdp->cdp_maxdirent = 0; + cdp->cdp_inode = 0; cdev = &cdp->cdp_c; @@ -133,6 +137,7 @@ devfs_alloc(void) LIST_INIT(&cdev->si_children); vfs_timestamp(&ts); cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts; + cdev->si_cred = NULL; return (cdev); } @@ -408,9 +413,6 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); - if (cdp->cdp_flags & CDP_INVALID) - continue; - if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; @@ -428,8 +430,6 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup) dd = dm->dm_rootdir; s = cdp->cdp_c.si_name; for (;;) { - while (*s == '/') - s++; for (q = s; *q != '/' && *q != '\0'; q++) continue; if (*q != '/') @@ -439,24 +439,6 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup) de = devfs_vmkdir(dm, s, q - s, dd, 0); s = q + 1; dd = de; - if (dd->de_flags & (DE_DOT | DE_DOTDOT)) - break; - } - - /* - * XXX: Ignore duplicate and empty device names. - * XXX: Currently there is no way to report the error to - * XXX: the make_dev(9) caller. - */ - if (dd->de_dirent->d_type != DT_DIR || - dd->de_flags & (DE_DOT | DE_DOTDOT) || q - s < 1 || - devfs_find(dd, s, q - s) != NULL) { - dev_lock(); - cdp->cdp_flags |= CDP_INVALID; - dev_unlock(); - printf("%s: %s: invalid or duplicate device name\n", - __func__, cdp->cdp_c.si_name); - return (1); } de = devfs_newdirent(s, q - s); diff --git a/sys/fs/devfs/devfs_int.h b/sys/fs/devfs/devfs_int.h index a998061513f..f5612e1580f 100644 --- a/sys/fs/devfs/devfs_int.h +++ b/sys/fs/devfs/devfs_int.h @@ -55,7 +55,6 @@ struct cdev_priv { u_int cdp_flags; #define CDP_ACTIVE (1 << 0) #define CDP_SCHED_DTR (1 << 1) -#define CDP_INVALID (1 << 2) u_int cdp_inuse; u_int cdp_maxdirent; @@ -71,7 +70,7 @@ struct cdev_priv { #define cdev2priv(c) member2struct(cdev_priv, cdp_c, c) -struct cdev *devfs_alloc(void); +struct cdev *devfs_alloc(int); void devfs_free(struct cdev *); void devfs_create(struct cdev *dev); void devfs_destroy(struct cdev *dev); diff --git a/sys/fs/ext2fs/ext2_readwrite.c b/sys/fs/ext2fs/ext2_readwrite.c index 9c9749a21d7..1a713ca5949 100644 --- a/sys/fs/ext2fs/ext2_readwrite.c +++ b/sys/fs/ext2fs/ext2_readwrite.c @@ -168,7 +168,6 @@ WRITE(ap) struct inode *ip; FS *fs; struct buf *bp; - struct thread *td; daddr_t lbn; off_t osize; int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; @@ -213,17 +212,8 @@ WRITE(ap) * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ - td = uio->uio_td; - if (vp->v_type == VREG && td != NULL) { - PROC_LOCK(td->td_proc); - if (uio->uio_offset + uio->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); resid = uio->uio_resid; osize = ip->i_size; diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index 2b2f8c547c7..2d302c56e86 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -46,7 +46,6 @@ #include #include -#include #include #include #include @@ -54,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -71,7 +69,6 @@ #include -#include #include #include diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index e9c593bdc0c..7a194124d79 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -61,9 +61,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -655,7 +652,6 @@ msdosfs_write(ap) struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; - struct thread *td = uio->uio_td; struct vnode *vp = ap->a_vp; struct vnode *thisvp; struct denode *dep = VTODE(vp); @@ -699,16 +695,8 @@ msdosfs_write(ap) /* * If they've exceeded their filesize limit, tell them about it. */ - if (td != NULL) { - PROC_LOCK(td->td_proc); - if ((uoff_t)uio->uio_offset + uio->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); /* * If the offset we are starting the write at is beyond the end of diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 8b6ada3865f..b0756e08d50 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -97,14 +97,37 @@ static void nfs_up(struct nfsmount *, struct thread *, const char *, int, int); static int nfs_msg(struct thread *, const char *, const char *, int); -extern int nfsv2_procid[]; - struct nfs_cached_auth { int ca_refs; /* refcount, including 1 from the cache */ uid_t ca_uid; /* uid that corresponds to this auth */ AUTH *ca_auth; /* RPC auth handle */ }; +static int nfsv2_procid[NFS_V3NPROCS] = { + NFSV2PROC_NULL, + NFSV2PROC_GETATTR, + NFSV2PROC_SETATTR, + NFSV2PROC_LOOKUP, + NFSV2PROC_NOOP, + NFSV2PROC_READLINK, + NFSV2PROC_READ, + NFSV2PROC_WRITE, + NFSV2PROC_CREATE, + NFSV2PROC_MKDIR, + NFSV2PROC_SYMLINK, + NFSV2PROC_CREATE, + NFSV2PROC_REMOVE, + NFSV2PROC_RMDIR, + NFSV2PROC_RENAME, + NFSV2PROC_LINK, + NFSV2PROC_READDIR, + NFSV2PROC_NOOP, + NFSV2PROC_STATFS, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, + NFSV2PROC_NOOP, +}; + /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. @@ -533,6 +556,15 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, if (nmp != NULL) { NFSINCRGLOBAL(newnfsstats.rpcrequests); + + /* Map the procnum to the old NFSv2 one, as required. */ + if ((nd->nd_flag & ND_NFSV2) != 0) { + if (nd->nd_procnum < NFS_V3NPROCS) + procnum = nfsv2_procid[nd->nd_procnum]; + else + procnum = NFSV2PROC_NOOP; + } + /* * Now only used for the R_DONTRECOVER case, but until that is * supported within the krpc code, I need to keep a queue of @@ -650,7 +682,7 @@ tryagain: trylater_delay = NFS_TRYLATERDEL; waituntil = NFSD_MONOSEC + trylater_delay; while (NFSD_MONOSEC < waituntil) - (void) nfs_catnap(PZERO, "nfstry"); + (void) nfs_catnap(PZERO, 0, "nfstry"); trylater_delay *= 2; goto tryagain; } diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index a65ebde881c..26765668d24 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -225,6 +225,8 @@ void newnfs_copycred(struct nfscred *nfscr, struct ucred *cr) { + KASSERT(nfscr->nfsc_ngroups >= 0, + ("newnfs_copycred: negative nfsc_ngroups")); cr->cr_uid = nfscr->nfsc_uid; crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups); } @@ -343,17 +345,21 @@ newnfs_timer(void *arg) /* - * sleep for a short period of time. + * Sleep for a short period of time unless errval == NFSERR_GRACE, where + * the sleep should be for 5 seconds. * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on * an event that never gets a wakeup. Only return EINTR or 0. */ int -nfs_catnap(int prio, const char *wmesg) +nfs_catnap(int prio, int errval, const char *wmesg) { static int non_event; int ret; - ret = tsleep(&non_event, prio, wmesg, 1); + if (errval == NFSERR_GRACE) + ret = tsleep(&non_event, prio, wmesg, 5 * hz); + else + ret = tsleep(&non_event, prio, wmesg, 1); if (ret != EINTR) ret = 0; return (ret); diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 498511c5939..d6ecda21731 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -322,7 +322,7 @@ int nfsvno_v4rootexport(struct nfsrv_descript *); void newnfs_portinit(void); struct ucred *newnfs_getcred(void); void newnfs_setroot(struct ucred *); -int nfs_catnap(int, const char *); +int nfs_catnap(int, int, const char *); struct nfsreferral *nfsv4root_getreferral(vnode_t, vnode_t, u_int32_t); int nfsrv_atroot(vnode_t, long *); void newnfs_timer(void *); @@ -369,7 +369,7 @@ int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *, int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_write(vnode_t, struct uio *, int *, u_char *, - struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); + struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int); int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t, enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *, int *, void *); @@ -502,7 +502,7 @@ int nfscl_maperr(NFSPROC_T *, int, uid_t, gid_t); void nfscl_init(void); /* nfs_clbio.c */ -int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int); +int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int, int); /* nfs_clnode.c */ void ncl_invalcaches(vnode_t); diff --git a/sys/fs/nfs/nfsclstate.h b/sys/fs/nfs/nfsclstate.h index 10747af5a64..72d8eebfc39 100644 --- a/sys/fs/nfs/nfsclstate.h +++ b/sys/fs/nfs/nfsclstate.h @@ -74,6 +74,7 @@ struct nfsclclient { #define NFSCLFLAGS_EXPIREIT 0x0040 #define NFSCLFLAGS_FIRSTDELEG 0x0080 #define NFSCLFLAGS_GOTDELEG 0x0100 +#define NFSCLFLAGS_RECVRINPROG 0x0200 struct nfsclowner { LIST_ENTRY(nfsclowner) nfsow_list; @@ -140,6 +141,7 @@ struct nfsclopen { #define NFSCLOPEN_OK 0 #define NFSCLOPEN_DOOPEN 1 #define NFSCLOPEN_DOOPENDOWNGRADE 2 +#define NFSCLOPEN_SETCRED 3 struct nfscllockowner { LIST_ENTRY(nfscllockowner) nfsl_list; diff --git a/sys/fs/nfs/nfskpiport.h b/sys/fs/nfs/nfskpiport.h index 1e1fb9c7a08..bd26a8e8f06 100644 --- a/sys/fs/nfs/nfskpiport.h +++ b/sys/fs/nfs/nfskpiport.h @@ -27,7 +27,7 @@ */ #ifndef _NFS_NFSKPIPORT_H_ -#define _NFSKPIPORT_NFS_H_ +#define _NFS_NFSKPIPORT_H_ /* * These definitions are needed since the generic code is now using Darwin8 * KPI stuff. (I know, seems a bit silly, but I want the code to build on @@ -70,4 +70,4 @@ typedef struct mbuf * mbuf_t; #define uio_iov_len(p) ((p)->uio_iov->iov_len) #define uio_iov_len_add(p, v) ((p)->uio_iov->iov_len += (v)) -#endif /* _NFSKPIPORT_NFS_H */ +#endif /* _NFS_NFSKPIPORT_H */ diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index 3d370040b44..40dfe6cf1dc 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -33,7 +33,7 @@ */ #ifndef _NFS_NFSPORT_H_ -#define _NFSPORT_NFS_H_ +#define _NFS_NFSPORT_H_ /* * In general, I'm not fond of #includes in .h files, but this seems @@ -143,21 +143,21 @@ #define NFSMGET(m) do { \ MGET((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ - (void) nfs_catnap(PZERO, "nfsmget"); \ + (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) #define NFSMGETHDR(m) do { \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ - (void) nfs_catnap(PZERO, "nfsmget"); \ + (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) #define NFSMCLGET(m, w) do { \ MGET((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ - (void) nfs_catnap(PZERO, "nfsmget"); \ + (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGET((m), M_TRYWAIT, MT_DATA); \ } \ MCLGET((m), (w)); \ @@ -165,7 +165,7 @@ #define NFSMCLGETHDR(m, w) do { \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ while ((m) == NULL ) { \ - (void) nfs_catnap(PZERO, "nfsmget"); \ + (void) nfs_catnap(PZERO, 0, "nfsmget"); \ MGETHDR((m), M_TRYWAIT, MT_DATA); \ } \ } while (0) @@ -918,4 +918,4 @@ struct nfsreq { #endif /* _KERNEL */ -#endif /* _NFSPORT_NFS_H */ +#endif /* _NFS_NFSPORT_H */ diff --git a/sys/fs/nfsclient/nfs.h b/sys/fs/nfsclient/nfs.h index 4b542869d62..c6071aface4 100644 --- a/sys/fs/nfsclient/nfs.h +++ b/sys/fs/nfsclient/nfs.h @@ -79,14 +79,16 @@ int ncl_biowrite(struct vnode *, struct uio *, int, struct ucred *); int ncl_vinvalbuf(struct vnode *, int, struct thread *, int); int ncl_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *); -int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *); +int ncl_doio(struct vnode *, struct buf *, struct ucred *, struct thread *, + int); void ncl_nhinit(void); void ncl_nhuninit(void); void ncl_nodelock(struct nfsnode *); void ncl_nodeunlock(struct nfsnode *); int ncl_getattrcache(struct vnode *, struct vattr *); int ncl_readrpc(struct vnode *, struct uio *, struct ucred *); -int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *); +int ncl_writerpc(struct vnode *, struct uio *, struct ucred *, int *, int *, + int); int ncl_readlinkrpc(struct vnode *, struct uio *, struct ucred *); int ncl_readdirrpc(struct vnode *, struct uio *, struct ucred *, struct thread *); diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index d0dd2cc2e9f..7d6fcab6809 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -41,9 +41,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include -#include #include #include @@ -134,12 +131,13 @@ ncl_getpages(struct vop_getpages_args *ap) */ VM_OBJECT_LOCK(object); if (pages[ap->a_reqpage]->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -174,12 +172,13 @@ ncl_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { ncl_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -192,7 +191,6 @@ ncl_getpages(struct vop_getpages_args *ap) size = count - uio.uio_resid; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -235,17 +233,23 @@ ncl_getpages(struct vop_getpages_args *ap) * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -336,7 +340,7 @@ ncl_putpages(struct vop_putpages_args *ap) else iomode = NFSWRITE_FILESYNC; - error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit); + error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0); pmap_qremove(kva, npages); relpbuf(bp, &ncl_pbuf_freecnt); @@ -554,7 +558,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); - error = ncl_doio(vp, bp, cred, td); + error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); return (error); @@ -583,7 +587,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); - error = ncl_doio(vp, bp, cred, td); + error = ncl_doio(vp, bp, cred, td, 0); if (error) { bp->b_ioflags |= BIO_ERROR; brelse(bp); @@ -609,7 +613,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); - error = ncl_doio(vp, bp, cred, td); + error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); } @@ -638,7 +642,7 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); - error = ncl_doio(vp, bp, cred, td); + error = ncl_doio(vp, bp, cred, td, 0); /* * no error + B_INVAL == directory EOF, * use the block. @@ -771,7 +775,7 @@ do_sync: uio.uio_td = td; iomode = NFSWRITE_FILESYNC; error = ncl_writerpc(vp, &uio, cred, &iomode, - &must_commit); + &must_commit, 0); KASSERT((must_commit == 0), ("ncl_directio_write: Did not commit write")); if (error) @@ -876,7 +880,6 @@ ncl_write(struct vop_write_args *ap) daddr_t lbn; int bcount; int n, on, error = 0; - struct proc *p = td?td->td_proc:NULL; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -958,16 +961,8 @@ flush_and_restart: * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ - if (p != NULL) { - PROC_LOCK(p); - if (uio->uio_offset + uio->uio_resid > - lim_cur(p, RLIMIT_FSIZE)) { - psignal(p, SIGXFSZ); - PROC_UNLOCK(p); - return (EFBIG); - } - PROC_UNLOCK(p); - } + if (vn_rlimit_fsize(vp, uio, td)) + return (EFBIG); biosize = vp->v_mount->mnt_stat.f_iosize; /* @@ -1122,7 +1117,7 @@ again: if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); - error = ncl_doio(vp, bp, cred, td); + error = ncl_doio(vp, bp, cred, td, 0); if (error) { brelse(bp); break; @@ -1523,7 +1518,7 @@ ncl_doio_directwrite(struct buf *bp) iomode = NFSWRITE_FILESYNC; uiop->uio_td = NULL; /* NULL since we're in nfsiod */ - ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit); + ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0); KASSERT((must_commit == 0), ("ncl_doio_directwrite: Did not commit write")); free(iov_base, M_NFSDIRECTIO); free(uiop->uio_iov, M_NFSDIRECTIO); @@ -1550,7 +1545,8 @@ ncl_doio_directwrite(struct buf *bp) * synchronously or from an nfsiod. */ int -ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) +ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, + int called_from_strategy) { struct uio *uiop; struct nfsnode *np; @@ -1695,7 +1691,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) else iomode = NFSWRITE_FILESYNC; - error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit); + error = ncl_writerpc(vp, uiop, cr, &iomode, &must_commit, + called_from_strategy); /* * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try @@ -1732,6 +1729,12 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. * + * EIO is returned by ncl_writerpc() to indicate a recoverable + * write error and is handled as above, except that + * B_EINTR isn't set. One cause of this is a stale stateid + * error for the RPC that indicates recovery is required, + * when called with called_from_strategy != 0. + * * If the buffer is marked B_PAGING, it does not reside on * the vp's paging queues so we cannot call bdirty(). The * bp in this case is not an NFS cache block so we should @@ -1760,7 +1763,8 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) bdirty(bp); bp->b_flags &= ~B_DONE; } - if (error && (bp->b_flags & B_ASYNC) == 0) + if ((error == EINTR || error == ETIMEDOUT) && + (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; splx(s); } else { diff --git a/sys/fs/nfsclient/nfs_clnfsiod.c b/sys/fs/nfsclient/nfs_clnfsiod.c index 6649fc032a3..62ea4f8fd4e 100644 --- a/sys/fs/nfsclient/nfs_clnfsiod.c +++ b/sys/fs/nfsclient/nfs_clnfsiod.c @@ -278,9 +278,11 @@ nfssvc_iod(void *instance) (void)ncl_doio_directwrite(bp); } else { if (bp->b_iocmd == BIO_READ) - (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, NULL); + (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, + NULL, 0); else - (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, NULL); + (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, + NULL, 0); } mtx_lock(&ncl_iod_mutex); /* diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c index 6b2aa7a5f2a..e36431fa0ec 100644 --- a/sys/fs/nfsclient/nfs_clnode.c +++ b/sys/fs/nfsclient/nfs_clnode.c @@ -198,8 +198,7 @@ ncl_inactive(struct vop_inactive_args *ap) * must be flushed before the close, so that the stateid is * available for the writes. */ - if (nfscl_mustflush(vp)) - (void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1); + (void) ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0); (void) nfsrpc_close(vp, 1, ap->a_td); } diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index e81c3bf805b..f39666db68e 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -978,6 +978,8 @@ newnfs_copyincred(struct ucred *cr, struct nfscred *nfscr) { int i; + KASSERT(cr->cr_ngroups >= 0, + ("newnfs_copyincred: negative cr_ngroups")); nfscr->nfsc_uid = cr->cr_uid; nfscr->nfsc_ngroups = MIN(cr->cr_ngroups, NFS_MAXGRPS + 1); for (i = 0; i < nfscr->nfsc_ngroups; i++) diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 95943a9660b..5d00102c970 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -278,7 +278,13 @@ else printf(" fhl=0\n"); error = EIO; } newnfs_copyincred(cred, &op->nfso_cred); - } + } else if (ret == NFSCLOPEN_SETCRED) + /* + * This is a new local open on a delegation. It needs + * to have credentials so that an open can be done + * against the server during recovery. + */ + newnfs_copyincred(cred, &op->nfso_cred); /* * nfso_opencnt is the count of how many VOP_OPEN()s have @@ -292,7 +298,7 @@ else printf(" fhl=0\n"); nfscl_openrelease(op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { - (void) nfs_catnap(PZERO, "nfs_open"); + (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); @@ -454,7 +460,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op, cred, p); if (ret == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfs_open"); + (void) nfs_catnap(PZERO, ret, "nfs_open"); } while (ret == NFSERR_DELAY); error = ret; } @@ -478,7 +484,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, newfhlen, mode, op, name, namelen, &ndp, 0, 0x0, cred, p, syscred, 1); if (ret == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfs_open2"); + (void) nfs_catnap(PZERO, ret, "nfs_open2"); } while (ret == NFSERR_DELAY); if (ret) { if (ndp != NULL) @@ -618,6 +624,7 @@ nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) nd->nd_repstat == NFSERR_DELAY) && error == 0) (void) nfs_catnap(PZERO, + (int)nd->nd_repstat, "nfs_close"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && @@ -639,7 +646,7 @@ nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p) do { error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) - (void) nfs_catnap(PZERO, "nfs_close"); + (void) nfs_catnap(PZERO, error, "nfs_close"); } while (error == NFSERR_GRACE); NFSLOCKCLSTATE(); nfscl_lockunlock(&op->nfso_own->nfsow_rwlock); @@ -993,7 +1000,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID) { - (void) nfs_catnap(PZERO, "nfs_setattr"); + (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); @@ -1238,7 +1245,7 @@ nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID) { - (void) nfs_catnap(PZERO, "nfs_read"); + (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); @@ -1340,11 +1347,16 @@ nfsmout: /* * nfs write operation + * When called_from_strategy != 0, it should return EIO for an error that + * indicates recovery is in progress, so that the buffer will be left + * dirty and be written back to the server later. If it loops around, + * the recovery thread could get stuck waiting for the buffer and recovery + * will then deadlock. */ APPLESTATIC int nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, - void *stuff) + void *stuff, int called_from_strategy) { int error, expireret = 0, retrycnt, nostateid; u_int32_t clidrev = 0; @@ -1387,10 +1399,6 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, else error = nfsrpc_writerpc(vp, uiop, iomode, verfp, newcred, &stateid, p, nap, attrflagp, stuff); -if (error == NFSERR_BADSTATEID) { -printf("st=0x%x 0x%x 0x%x\n",stateid.other[0],stateid.other[1],stateid.other[2]); -nfscl_dumpstate(nmp, 1, 1, 0, 0); -} if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) @@ -1398,18 +1406,21 @@ nfscl_dumpstate(nmp, 1, 1, 0, 0); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || error == NFSERR_OLDSTATEID) { - (void) nfs_catnap(PZERO, "nfs_write"); + (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); } retrycnt++; - } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + } while (error == NFSERR_GRACE || error == NFSERR_DELAY || + ((error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); - if (error && retrycnt >= 4) + if (error != 0 && (retrycnt >= 4 || + ((error == NFSERR_STALESTATEID || + error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp) && p == NULL) NFSFREECRED(newcred); @@ -1722,7 +1733,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfscl_ownerrelease(owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { - (void) nfs_catnap(PZERO, "nfs_open"); + (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); @@ -1955,7 +1966,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh, nfhp->nfh_len, op, cred, p); if (ret == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfs_create"); + (void) nfs_catnap(PZERO, ret, "nfs_create"); } while (ret == NFSERR_DELAY); error = ret; } @@ -1977,7 +1988,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op, name, namelen, &dp, 0, 0x0, cred, p, 0, 1); if (ret == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfs_crt2"); + (void) nfs_catnap(PZERO, ret, "nfs_crt2"); } while (ret == NFSERR_DELAY); if (ret) { if (dp != NULL) @@ -3519,7 +3530,8 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, if ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0) - (void) nfs_catnap(PZERO, "nfs_advlock"); + (void) nfs_catnap(PZERO, (int)nd->nd_repstat, + "nfs_advlock"); } while ((nd->nd_repstat == NFSERR_GRACE || nd->nd_repstat == NFSERR_DELAY) && error == 0); } @@ -3556,7 +3568,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY) { - (void) nfs_catnap(PZERO, "nfs_advlock"); + (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p); diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 568c5de640a..b6fad20d0f5 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -139,7 +139,7 @@ static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *); static int nfscl_errmap(struct nfsrv_descript *); static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *); static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *, - struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *); + struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int); static void nfscl_freeopenowner(struct nfsclowner *, int); static void nfscl_cleandeleg(struct nfscldeleg *); static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, @@ -274,8 +274,13 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, *owpp = owp; if (opp != NULL) *opp = op; - if (retp != NULL) - *retp = NFSCLOPEN_OK; + if (retp != NULL) { + if (nfhp != NULL && dp != NULL && nop == NULL) + /* new local open on delegation */ + *retp = NFSCLOPEN_SETCRED; + else + *retp = NFSCLOPEN_OK; + } /* * Now, check the mode on the open and return the appropriate @@ -475,6 +480,13 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, return (EACCES); } + /* + * Wait for recovery to complete. + */ + while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG)) + (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR, + PZERO, "nfsrecvr", NULL); + /* * First, look for a delegation. */ @@ -772,7 +784,7 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_CLIDINUSE) { - (void) nfs_catnap(PZERO, "nfs_setcl"); + (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || @@ -1773,6 +1785,7 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) * block when trying to use state. */ NFSLOCKCLSTATE(); + clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR); @@ -1789,9 +1802,10 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { nfscl_cleanclient(clp); - clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID | - NFSCLFLAGS_RECOVER); NFSLOCKCLSTATE(); + clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID | + NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG); + wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return; @@ -2032,7 +2046,7 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) - (void) nfs_catnap(PZERO, "nfsexcls"); + (void) nfs_catnap(PZERO, error, "nfsexcls"); } while (error == NFSERR_GRACE); LIST_REMOVE(op, nfso_list); FREE((caddr_t)op, M_NFSCLOPEN); @@ -2045,13 +2059,15 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) newnfs_copycred(&dp->nfsdl_cred, tcred); error = nfscl_trydelegreturn(dp, tcred, nmp, p); if (error == NFSERR_GRACE) - (void) nfs_catnap(PZERO, "nfsexdlg"); + (void) nfs_catnap(PZERO, error, "nfsexdlg"); } while (error == NFSERR_GRACE); TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list); FREE((caddr_t)dp, M_NFSCLDELEG); } NFSLOCKCLSTATE(); + clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; + wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); @@ -2095,6 +2111,7 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) NFSUNLOCKCLSTATE(); return (0); } + clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; @@ -2111,6 +2128,7 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) * Clear out any state. */ nfscl_cleanclient(clp); + NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID | NFSCLFLAGS_RECOVER); } else { @@ -2124,14 +2142,15 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) * Expire the state for the client. */ nfscl_expireclient(clp, nmp, cred, p); + NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } - NFSFREECRED(cred); - clp->nfsc_flags &= ~NFSCLFLAGS_EXPIREIT; - NFSLOCKCLSTATE(); + clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG); + wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); + NFSFREECRED(cred); return (error); } @@ -2311,14 +2330,30 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) struct ucred *cred; u_int32_t clidrev; int error, cbpathdown, islept, igotlock, ret, clearok; + uint32_t recover_done_time = 0; cred = newnfs_getcred(); + NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD; + NFSUNLOCKCLSTATE(); for(;;) { newnfs_setroot(cred); cbpathdown = 0; - if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) - nfscl_recover(clp, cred, p); + if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) { + /* + * Only allow one recover within 1/2 of the lease + * duration (nfsc_renew). + */ + if (recover_done_time < NFSD_MONOSEC) { + recover_done_time = NFSD_MONOSEC + + clp->nfsc_renew; + nfscl_recover(clp, cred, p); + } else { + NFSLOCKCLSTATE(); + clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; + NFSUNLOCKCLSTATE(); + } + } if (clp->nfsc_expire <= NFSD_MONOSEC && (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; @@ -2326,9 +2361,11 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) error = nfsrpc_renew(clp, cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; - else if (error == NFSERR_STALECLIENTID) + else if (error == NFSERR_STALECLIENTID) { + NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; - else if (error == NFSERR_EXPIRED) + NFSUNLOCKCLSTATE(); + } else if (error == NFSERR_EXPIRED) (void) nfscl_hasexpired(clp, clidrev, p); } @@ -2432,7 +2469,7 @@ tryagain: NFSUNLOCKCLSTATE(); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp, - NULL, cred, p); + NULL, cred, p, 1); if (!ret) { nfscl_cleandeleg(dp); TAILQ_REMOVE(&clp->nfsc_deleg, dp, @@ -3272,7 +3309,8 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, */ static int nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, - struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p) + struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p, + int called_from_renewthread) { struct nfsclowner *owp, *lowp, *nowp; struct nfsclopen *op, *lop; @@ -3306,6 +3344,7 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, * Ok, if it's a write delegation, flush data to the server, so * that close/open consistency is retained. */ + ret = 0; NFSLOCKNODE(np); if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) { #ifdef APPLE @@ -3314,7 +3353,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, np->n_flag |= NDELEGRECALL; #endif NFSUNLOCKNODE(np); - (void) ncl_flush(vp, MNT_WAIT, cred, p, 1); + ret = ncl_flush(vp, MNT_WAIT, cred, p, 1, + called_from_renewthread); NFSLOCKNODE(np); #ifdef APPLE OSBitAndAtomic((int32_t)~(NMODIFIED | NDELEGRECALL), (UInt32 *)&np->n_flag); @@ -3323,6 +3363,16 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, #endif } NFSUNLOCKNODE(np); + if (ret == EIO && called_from_renewthread != 0) { + /* + * If the flush failed with EIO for the renew thread, + * return now, so that the dirty buffer will be flushed + * later. + */ + if (gotvp != 0) + vrele(vp); + return (ret); + } /* * Now, for each openowner with opens issued locally, move them @@ -3569,7 +3619,7 @@ nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 0, 0); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstryop"); + (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ @@ -3579,7 +3629,7 @@ nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 1, 0); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstryop"); + (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); } return (error); @@ -3602,7 +3652,8 @@ nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 0); if (!error && nd->nd_repstat == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrylck"); + (void) nfs_catnap(PZERO, (int)nd->nd_repstat, + "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; @@ -3613,7 +3664,8 @@ nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 1); if (!error && nd->nd_repstat == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrylck"); + (void) nfs_catnap(PZERO, (int)nd->nd_repstat, + "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; @@ -3635,7 +3687,7 @@ nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 0); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrydp"); + (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ @@ -3643,7 +3695,7 @@ nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 1); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrydp"); + (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); } return (error); @@ -3664,7 +3716,7 @@ nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrycl"); + (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ @@ -3672,7 +3724,7 @@ nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1); if (error == NFSERR_DELAY) - (void) nfs_catnap(PZERO, "nfstrycl"); + (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); } return (error); @@ -3820,7 +3872,7 @@ nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); - (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p); + (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); @@ -3918,7 +3970,7 @@ nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); - (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p); + (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 0bf2bf4b4e5..76154c37180 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -652,7 +652,7 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); - (void) nfs_catnap(PSOCK, "newnfscon"); + (void) nfs_catnap(PSOCK, 0, "newnfscon"); } } } else { @@ -1188,7 +1188,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); if (error) - (void) nfs_catnap(PZERO, "nfsgetdirp"); + (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); @@ -1284,7 +1284,7 @@ nfs_unmount(struct mount *mp, int mntflags) do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) - (void) nfs_catnap(PSOCK, "newndm"); + (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 3be823f1969..d53c8386242 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -670,13 +670,13 @@ nfs_close(struct vop_close_args *ap) * traditional vnode locking implemented for Vnode Ops. */ int cm = newnfs_commit_on_close ? 1 : 0; - error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm); + error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); /* np->n_flag &= ~NMODIFIED; */ } else if (NFS_ISV4(vp)) { if (nfscl_mustflush(vp)) { int cm = newnfs_commit_on_close ? 1 : 0; error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, - cm); + cm, 0); /* * as above w.r.t races when clearing * NMODIFIED. @@ -1306,7 +1306,7 @@ ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) */ int ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, - int *iomode, int *must_commit) + int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; int error = 0, attrflag, ret; @@ -1315,7 +1315,7 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, *must_commit = 0; error = nfsrpc_write(vp, uiop, iomode, verf, cred, - uiop->uio_td, &nfsva, &attrflag, NULL); + uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); NFSLOCKMNT(nmp); if (!error && NFSHASWRITEVERF(nmp) && NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) { @@ -2473,7 +2473,7 @@ nfs_strategy(struct vop_strategy_args *ap) */ if ((bp->b_flags & B_ASYNC) == 0 || ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) - (void)ncl_doio(ap->a_vp, bp, cr, curthread); + (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); return (0); } @@ -2484,17 +2484,20 @@ nfs_strategy(struct vop_strategy_args *ap) static int nfs_fsync(struct vop_fsync_args *ap) { - return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1)); + return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. + * If the called_from_renewthread argument is TRUE, it has been called + * from the NFSv4 renew thread and, as such, cannot block indefinitely + * waiting for a buffer write to complete. */ int ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, - int commit) + int commit, int called_from_renewthread) { struct nfsnode *np = VTONFS(vp); struct buf *bp; @@ -2513,6 +2516,8 @@ ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; + if (called_from_renewthread != 0) + slptimeo = hz; if (nmp->nm_flag & NFSMNT_INT) slpflag = NFS_PCATCH; if (!commit) @@ -2708,6 +2713,14 @@ loop: error = 0; goto loop; } + if (called_from_renewthread != 0) { + /* + * Return EIO so the flush will be retried + * later. + */ + error = EIO; + goto done; + } if (newnfs_sigintr(nmp, td)) { error = EINTR; goto done; @@ -2747,6 +2760,14 @@ loop: error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); + if (called_from_renewthread != 0) { + /* + * Return EIO so that the flush will be + * retried later. + */ + error = EIO; + goto done; + } error = newnfs_sigintr(nmp, td); if (error) goto done; @@ -2838,7 +2859,7 @@ nfs_advlock(struct vop_advlock_args *ap) */ if (ap->a_op == F_UNLCK && nfscl_checkwritelocked(vp, ap->a_fl, cred, td)) - (void) ncl_flush(vp, MNT_WAIT, cred, td, 1); + (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); /* * Loop around doing the lock op, while a blocking lock @@ -2850,7 +2871,8 @@ nfs_advlock(struct vop_advlock_args *ap) if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { VOP_UNLOCK(vp, 0); - error = nfs_catnap(PZERO | PCATCH, "ncladvl"); + error = nfs_catnap(PZERO | PCATCH, ret, + "ncladvl"); if (error) return (EINTR); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index a4df1ed9c33..e475cb77826 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -4578,7 +4578,7 @@ nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) 100000) return; /* Sleep for a short period of time */ - (void) nfs_catnap(PZERO, "nfsremove"); + (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); } diff --git a/sys/fs/nwfs/nwfs_io.c b/sys/fs/nwfs/nwfs_io.c index 75b1c188a18..0bd7dc355ff 100644 --- a/sys/fs/nwfs/nwfs_io.c +++ b/sys/fs/nwfs/nwfs_io.c @@ -28,16 +28,13 @@ */ #include #include -#include /* defines plimit structure in proc struct */ #include #include #include -#include #include #include #include #include -#include #include #include @@ -229,16 +226,10 @@ nwfs_writevnode(vp, uiop, cred, ioflag) } } if (uiop->uio_resid == 0) return 0; - if (td != NULL) { - PROC_LOCK(td->td_proc); - if (uiop->uio_offset + uiop->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + + if (vn_rlimit_fsize(vp, uiop, td)) + return (EFBIG); + error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, uiop, cred); NCPVNDEBUG("after: ofs=%d,resid=%d\n",(int)uiop->uio_offset, uiop->uio_resid); if (!error) { @@ -437,19 +428,19 @@ nwfs_getpages(ap) VM_OBJECT_LOCK(object); if (error && (uio.uio_resid == count)) { printf("nwfs_getpages: error %d\n",error); - vm_page_lock_queues(); for (i = 0; i < npages; i++) { - if (ap->a_reqpage != i) + if (ap->a_reqpage != i) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -480,17 +471,23 @@ nwfs_getpages(ap) * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; #endif /* NWFS_RWCACHE */ diff --git a/sys/fs/pseudofs/pseudofs_vnops.c b/sys/fs/pseudofs/pseudofs_vnops.c index 5854378336c..f8343a97401 100644 --- a/sys/fs/pseudofs/pseudofs_vnops.c +++ b/sys/fs/pseudofs/pseudofs_vnops.c @@ -542,7 +542,7 @@ pfs_lookup(struct vop_cachedlookup_args *va) if (cnp->cn_flags & ISDOTDOT) vn_lock(vn, LK_EXCLUSIVE|LK_RETRY); - if (cnp->cn_flags & MAKEENTRY) + if (cnp->cn_flags & MAKEENTRY && !(vn->v_iflag & VI_DOOMED)) cache_enter(vn, *vpp, cnp); PFS_RETURN (0); failed: diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c index df779a6151b..9becfdda13e 100644 --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -28,9 +28,7 @@ */ #include #include -#include /* defines plimit structure in proc struct */ #include -#include #include #include #include @@ -235,7 +233,6 @@ smbfs_writevnode(struct vnode *vp, struct uio *uiop, struct smbmount *smp = VTOSMBFS(vp); struct smbnode *np = VTOSMB(vp); struct smb_cred scred; - struct proc *p; struct thread *td; int error = 0; @@ -249,7 +246,6 @@ smbfs_writevnode(struct vnode *vp, struct uio *uiop, /* if (uiop->uio_offset + uiop->uio_resid > smp->nm_maxfilesize) return (EFBIG);*/ td = uiop->uio_td; - p = td->td_proc; if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { smbfs_attr_cacheremove(vp); @@ -271,16 +267,10 @@ smbfs_writevnode(struct vnode *vp, struct uio *uiop, } if (uiop->uio_resid == 0) return 0; - if (p != NULL) { - PROC_LOCK(p); - if (uiop->uio_offset + uiop->uio_resid > - lim_cur(p, RLIMIT_FSIZE)) { - psignal(p, SIGXFSZ); - PROC_UNLOCK(p); - return EFBIG; - } - PROC_UNLOCK(p); - } + + if (vn_rlimit_fsize(vp, uiop, td)) + return (EFBIG); + smb_makescred(&scred, td, cred); error = smb_write(smp->sm_share, np->n_fid, uiop, &scred); SMBVDEBUG("after: ofs=%d,resid=%d\n",(int)uiop->uio_offset, uiop->uio_resid); @@ -450,12 +440,13 @@ smbfs_getpages(ap) VM_OBJECT_LOCK(object); if (m->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; } @@ -488,19 +479,19 @@ smbfs_getpages(ap) VM_OBJECT_LOCK(object); if (error && (uio.uio_resid == count)) { printf("smbfs_getpages: error %d\n",error); - vm_page_lock_queues(); for (i = 0; i < npages; i++) { - if (reqpage != i) + if (reqpage != i) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -544,17 +535,23 @@ smbfs_getpages(ap) * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return 0; #endif /* SMBFS_RWGENERIC */ diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index c17efff9d5f..330eea57538 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -460,9 +459,9 @@ tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx, error = uiomove_fromphys(&m, offset, tlen, uio); VM_OBJECT_LOCK(tobj); out: - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, TRUE); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); @@ -516,8 +515,16 @@ tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { - if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "tmfsmr"); goto lookupvpg; + } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); error = uiomove_fromphys(&m, offset, tlen, uio); @@ -526,8 +533,16 @@ lookupvpg: VM_OBJECT_UNLOCK(vobj); return (error); } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) { - if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "tmfsmr"); goto lookupvpg; + } vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); sched_pin(); @@ -627,8 +642,16 @@ tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *ui lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { - if (vm_page_sleep_if_busy(vpg, FALSE, "tmfsmw")) + if ((vpg->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and sleeping so + * that the page daemon is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(vpg, PG_REFERENCED); + vm_page_sleep(vpg, "tmfsmw"); goto lookupvpg; + } vm_page_busy(vpg); vm_page_lock_queues(); vm_page_undirty(vpg); @@ -667,6 +690,7 @@ nocache: out: if (vobj != NULL) VM_OBJECT_LOCK(vobj); + vm_page_lock(tpg); vm_page_lock_queues(); if (error == 0) { KASSERT(tpg->valid == VM_PAGE_BITS_ALL, @@ -675,6 +699,7 @@ out: } vm_page_unwire(tpg, TRUE); vm_page_unlock_queues(); + vm_page_unlock(tpg); vm_page_wakeup(tpg); if (vpg != NULL) vm_page_wakeup(vpg); @@ -692,7 +717,6 @@ tmpfs_write(struct vop_write_args *v) struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int ioflag = v->a_ioflag; - struct thread *td = uio->uio_td; boolean_t extended; int error = 0; @@ -722,16 +746,8 @@ tmpfs_write(struct vop_write_args *v) VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); - if (vp->v_type == VREG && td != NULL) { - PROC_LOCK(td->td_proc); - if (uio->uio_offset + uio->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); extended = uio->uio_offset + uio->uio_resid > node->tn_size; if (extended) { diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 2a405c5b502..a3746c09e1a 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -340,7 +340,7 @@ g_eli_worker(void *arg) } #endif thread_lock(curthread); - sched_prio(curthread, PRIBIO); + sched_prio(curthread, PUSER); if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0) sched_bind(curthread, wr->w_number); thread_unlock(curthread); @@ -361,8 +361,7 @@ g_eli_worker(void *arg) mtx_unlock(&sc->sc_queue_mtx); kproc_exit(0); } - msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, - "geli:w", 0); + msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0); continue; } mtx_unlock(&sc->sc_queue_mtx); diff --git a/sys/geom/geom.h b/sys/geom/geom.h index 82d04568aee..fe18a15e8c6 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -353,6 +353,9 @@ g_free(void *ptr) sx_assert(&topology_lock, SX_UNLOCKED); \ } while (0) +#define g_topology_sleep(chan, timo) \ + sx_sleep(chan, &topology_lock, 0, "gtopol", timo) + #define DECLARE_GEOM_CLASS(class, name) \ static moduledata_t name##_mod = { \ #name, g_modevent, &class \ diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index 931b7c30338..1090c0bbf66 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -309,8 +309,8 @@ g_io_check(struct bio *bp) case BIO_READ: case BIO_WRITE: case BIO_DELETE: - /* Zero sectorsize is a probably lack of media */ - if (pp->sectorsize == 0) + /* Zero sectorsize or mediasize is probably a lack of media. */ + if (pp->sectorsize == 0 || pp->mediasize == 0) return (ENXIO); /* Reject I/O not on sector boundary */ if (bp->bio_offset % pp->sectorsize) diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index 9bef0e3638f..b001c5fcaed 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -59,6 +59,10 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef KDB +#include +#endif + struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; @@ -130,65 +134,73 @@ g_load_class(void *arg, int flag) } } -static void -g_unload_class(void *arg, int flag) +static int +g_unload_class(struct g_class *mp) { - struct g_hh00 *hh; - struct g_class *mp; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; int error; - g_topology_assert(); - hh = arg; - mp = hh->mp; - G_VALID_CLASS(mp); + g_topology_lock(); g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name); +retry: + G_VALID_CLASS(mp); + LIST_FOREACH(gp, &mp->geom, geom) { + /* We refuse to unload if anything is open */ + LIST_FOREACH(pp, &gp->provider, provider) + if (pp->acr || pp->acw || pp->ace) { + g_topology_unlock(); + return (EBUSY); + } + LIST_FOREACH(cp, &gp->consumer, consumer) + if (cp->acr || cp->acw || cp->ace) { + g_topology_unlock(); + return (EBUSY); + } + /* If the geom is withering, wait for it to finish. */ + if (gp->flags & G_GEOM_WITHER) { + g_topology_sleep(mp, 1); + goto retry; + } + } /* * We allow unloading if we have no geoms, or a class * method we can use to get rid of them. */ if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) { - hh->error = EOPNOTSUPP; - return; - } - - /* We refuse to unload if anything is open */ - LIST_FOREACH(gp, &mp->geom, geom) { - LIST_FOREACH(pp, &gp->provider, provider) - if (pp->acr || pp->acw || pp->ace) { - hh->error = EBUSY; - return; - } - LIST_FOREACH(cp, &gp->consumer, consumer) - if (cp->acr || cp->acw || cp->ace) { - hh->error = EBUSY; - return; - } + g_topology_unlock(); + return (EOPNOTSUPP); } /* Bar new entries */ mp->taste = NULL; mp->config = NULL; - error = 0; + LIST_FOREACH(gp, &mp->geom, geom) { + error = mp->destroy_geom(NULL, mp, gp); + if (error != 0) { + g_topology_unlock(); + return (error); + } + } + /* Wait for withering to finish. */ for (;;) { gp = LIST_FIRST(&mp->geom); if (gp == NULL) break; - error = mp->destroy_geom(NULL, mp, gp); - if (error != 0) - break; + KASSERT(gp->flags & G_GEOM_WITHER, + ("Non-withering geom in class %s", mp->name)); + g_topology_sleep(mp, 1); } - if (error == 0) { - if (mp->fini != NULL) - mp->fini(mp); - LIST_REMOVE(mp, class); - } - hh->error = error; - return; + G_VALID_CLASS(mp); + if (mp->fini != NULL) + mp->fini(mp); + LIST_REMOVE(mp, class); + g_topology_unlock(); + + return (0); } int @@ -209,12 +221,12 @@ g_modevent(module_t mod, int type, void *data) g_ignition++; g_init(); } - hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); - hh->mp = data; error = EOPNOTSUPP; switch (type) { case MOD_LOAD: - g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", hh->mp->name); + g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name); + hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); + hh->mp = mp; /* * Once the system is not cold, MOD_LOAD calls will be * from the userland and the g_event thread will be able @@ -232,18 +244,14 @@ g_modevent(module_t mod, int type, void *data) } break; case MOD_UNLOAD: - g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", hh->mp->name); - error = g_waitfor_event(g_unload_class, hh, M_WAITOK, NULL); - if (error == 0) - error = hh->error; + g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name); + DROP_GIANT(); + error = g_unload_class(mp); + PICKUP_GIANT(); if (error == 0) { - KASSERT(LIST_EMPTY(&hh->mp->geom), - ("Unloaded class (%s) still has geom", hh->mp->name)); + KASSERT(LIST_EMPTY(&mp->geom), + ("Unloaded class (%s) still has geom", mp->name)); } - g_free(hh); - break; - default: - g_free(hh); break; } return (error); @@ -1011,12 +1019,11 @@ g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len) #if defined(DIAGNOSTIC) || defined(DDB) /* - * This function walks (topologically unsafely) the mesh and return a - * non-zero integer if it finds the argument pointer is an object. - * The return value indicates which type of object it is belived to be. - * If topology is not locked, this function is potentially dangerous, - * but since it is for debugging purposes and can be useful for instance - * from DDB, we do not assert topology lock is held. + * This function walks the mesh and returns a non-zero integer if it + * finds the argument pointer is an object. The return value indicates + * which type of object it is believed to be. If topology is not locked, + * this function is potentially dangerous, but we don't assert that the + * topology lock is held when called from debugger. */ int g_valid_obj(void const *ptr) @@ -1026,7 +1033,10 @@ g_valid_obj(void const *ptr) struct g_consumer *cp; struct g_provider *pp; - g_topology_assert(); +#ifdef KDB + if (kdb_active == 0) +#endif + g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (ptr == mp) diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 0434ace0cfe..f47ac08ffe3 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -855,7 +855,9 @@ g_part_ctl_delete(struct gctl_req *req, struct g_part_parms *gpp) static int g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) { + struct g_consumer *cp; struct g_geom *gp; + struct g_provider *pp; struct g_part_entry *entry; struct g_part_table *null, *table; struct sbuf *sb; @@ -885,6 +887,11 @@ g_part_ctl_destroy(struct gctl_req *req, struct g_part_parms *gpp) null->gpt_gp = gp; null->gpt_scheme = &g_part_null_scheme; LIST_INIT(&null->gpt_entry); + + cp = LIST_FIRST(&gp->consumer); + pp = cp->provider; + null->gpt_last = pp->mediasize / pp->sectorsize - 1; + null->gpt_depth = table->gpt_depth; null->gpt_opened = table->gpt_opened; null->gpt_smhead = table->gpt_smhead; @@ -971,9 +978,85 @@ g_part_ctl_recover(struct gctl_req *req, struct g_part_parms *gpp) static int g_part_ctl_resize(struct gctl_req *req, struct g_part_parms *gpp) { - gctl_error(req, "%d verb 'resize'", ENOSYS); - return (ENOSYS); -} + struct g_geom *gp; + struct g_provider *pp; + struct g_part_entry *pe, *entry; + struct g_part_table *table; + struct sbuf *sb; + quad_t end; + int error; + + gp = gpp->gpp_geom; + G_PART_TRACE((G_T_TOPOLOGY, "%s(%s)", __func__, gp->name)); + g_topology_assert(); + table = gp->softc; + + /* check gpp_index */ + LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { + if (entry->gpe_deleted || entry->gpe_internal) + continue; + if (entry->gpe_index == gpp->gpp_index) + break; + } + if (entry == NULL) { + gctl_error(req, "%d index '%d'", ENOENT, gpp->gpp_index); + return (ENOENT); + } + + /* check gpp_size */ + end = entry->gpe_start + gpp->gpp_size - 1; + if (gpp->gpp_size < 1 || end > table->gpt_last) { + gctl_error(req, "%d size '%jd'", EINVAL, + (intmax_t)gpp->gpp_size); + return (EINVAL); + } + + LIST_FOREACH(pe, &table->gpt_entry, gpe_entry) { + if (pe->gpe_deleted || pe->gpe_internal || pe == entry) + continue; + if (end >= pe->gpe_start && end <= pe->gpe_end) { + gctl_error(req, "%d end '%jd'", ENOSPC, + (intmax_t)end); + return (ENOSPC); + } + if (entry->gpe_start < pe->gpe_start && end > pe->gpe_end) { + gctl_error(req, "%d size '%jd'", ENOSPC, + (intmax_t)gpp->gpp_size); + return (ENOSPC); + } + } + + pp = entry->gpe_pp; + if ((g_debugflags & 16) == 0 && + (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)) { + gctl_error(req, "%d", EBUSY); + return (EBUSY); + } + + error = G_PART_RESIZE(table, entry, gpp); + if (error) { + gctl_error(req, "%d", error); + return (error); + } + + if (!entry->gpe_created) + entry->gpe_modified = 1; + + /* update mediasize of changed provider */ + pp->mediasize = (entry->gpe_end - entry->gpe_start + 1) * + pp->sectorsize; + + /* Provide feedback if so requested. */ + if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { + sb = sbuf_new_auto(); + G_PART_FULLNAME(table, entry, sb, gp->name); + sbuf_cat(sb, " resized\n"); + sbuf_finish(sb); + gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + } + return (0); +} static int g_part_ctl_setunset(struct gctl_req *req, struct g_part_parms *gpp, @@ -1065,10 +1148,16 @@ g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) table->gpt_created) ? 1 : 0; if (reprobe) { - if (!LIST_EMPTY(&table->gpt_entry)) { + LIST_FOREACH(entry, &table->gpt_entry, gpe_entry) { + if (entry->gpe_internal) + continue; error = EBUSY; goto fail; } + while ((entry = LIST_FIRST(&table->gpt_entry)) != NULL) { + LIST_REMOVE(entry, gpe_entry); + g_free(entry); + } error = g_part_probe(gp, cp, table->gpt_depth); if (error) { g_topology_lock(); @@ -1077,6 +1166,15 @@ g_part_ctl_undo(struct gctl_req *req, struct g_part_parms *gpp) return (0); } table = gp->softc; + + /* + * Synthesize a disk geometry. Some partitioning schemes + * depend on it and since some file systems need it even + * when the partitition scheme doesn't, we do it here in + * scheme-independent code. + */ + pp = cp->provider; + g_part_geometry(table, cp, pp->mediasize / pp->sectorsize); } error = G_PART_READ(table, cp); @@ -1194,7 +1292,8 @@ g_part_ctlreq(struct gctl_req *req, struct g_class *mp, const char *verb) mparms |= G_PART_PARM_GEOM; } else if (!strcmp(verb, "resize")) { ctlreq = G_PART_CTL_RESIZE; - mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX; + mparms |= G_PART_PARM_GEOM | G_PART_PARM_INDEX | + G_PART_PARM_SIZE; } break; case 's': diff --git a/sys/geom/part/g_part_apm.c b/sys/geom/part/g_part_apm.c index 030a0e69c86..c80925a4f53 100644 --- a/sys/geom/part/g_part_apm.c +++ b/sys/geom/part/g_part_apm.c @@ -74,6 +74,8 @@ static int g_part_apm_read(struct g_part_table *, struct g_consumer *); static const char *g_part_apm_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_apm_write(struct g_part_table *, struct g_consumer *); +static int g_part_apm_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_apm_methods[] = { KOBJMETHOD(g_part_add, g_part_apm_add), @@ -82,6 +84,7 @@ static kobj_method_t g_part_apm_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_apm_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_apm_dumpto), KOBJMETHOD(g_part_modify, g_part_apm_modify), + KOBJMETHOD(g_part_resize, g_part_apm_resize), KOBJMETHOD(g_part_name, g_part_apm_name), KOBJMETHOD(g_part_probe, g_part_apm_probe), KOBJMETHOD(g_part_read, g_part_apm_read), @@ -318,6 +321,19 @@ g_part_apm_modify(struct g_part_table *basetable, return (0); } +static int +g_part_apm_resize(struct g_part_table *basetable, + struct g_part_entry *baseentry, struct g_part_parms *gpp) +{ + struct g_part_apm_entry *entry; + + entry = (struct g_part_apm_entry *)baseentry; + baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; + entry->ent.ent_size = gpp->gpp_size; + + return (0); +} + static const char * g_part_apm_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/part/g_part_bsd.c b/sys/geom/part/g_part_bsd.c index 18a74aa0c79..e99f0bbf821 100644 --- a/sys/geom/part/g_part_bsd.c +++ b/sys/geom/part/g_part_bsd.c @@ -73,6 +73,8 @@ static int g_part_bsd_read(struct g_part_table *, struct g_consumer *); static const char *g_part_bsd_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_bsd_write(struct g_part_table *, struct g_consumer *); +static int g_part_bsd_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_bsd_methods[] = { KOBJMETHOD(g_part_add, g_part_bsd_add), @@ -82,6 +84,7 @@ static kobj_method_t g_part_bsd_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_bsd_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_bsd_dumpto), KOBJMETHOD(g_part_modify, g_part_bsd_modify), + KOBJMETHOD(g_part_resize, g_part_bsd_resize), KOBJMETHOD(g_part_name, g_part_bsd_name), KOBJMETHOD(g_part_probe, g_part_bsd_probe), KOBJMETHOD(g_part_read, g_part_bsd_read), @@ -288,6 +291,19 @@ g_part_bsd_modify(struct g_part_table *basetable, return (0); } +static int +g_part_bsd_resize(struct g_part_table *basetable, + struct g_part_entry *baseentry, struct g_part_parms *gpp) +{ + struct g_part_bsd_entry *entry; + + entry = (struct g_part_bsd_entry *)baseentry; + baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; + entry->part.p_size = gpp->gpp_size; + + return (0); +} + static const char * g_part_bsd_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/part/g_part_gpt.c b/sys/geom/part/g_part_gpt.c index b0c959852b6..a6f74907102 100644 --- a/sys/geom/part/g_part_gpt.c +++ b/sys/geom/part/g_part_gpt.c @@ -103,6 +103,8 @@ static int g_part_gpt_read(struct g_part_table *, struct g_consumer *); static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_gpt_write(struct g_part_table *, struct g_consumer *); +static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_add, g_part_gpt_add), @@ -112,6 +114,7 @@ static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_gpt_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_gpt_dumpto), KOBJMETHOD(g_part_modify, g_part_gpt_modify), + KOBJMETHOD(g_part_resize, g_part_gpt_resize), KOBJMETHOD(g_part_name, g_part_gpt_name), KOBJMETHOD(g_part_probe, g_part_gpt_probe), KOBJMETHOD(g_part_read, g_part_gpt_read), @@ -550,6 +553,19 @@ g_part_gpt_modify(struct g_part_table *basetable, return (0); } +static int +g_part_gpt_resize(struct g_part_table *basetable, + struct g_part_entry *baseentry, struct g_part_parms *gpp) +{ + struct g_part_gpt_entry *entry; + entry = (struct g_part_gpt_entry *)baseentry; + + baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; + entry->ent.ent_lba_end = baseentry->gpe_end; + + return (0); +} + static const char * g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/part/g_part_if.m b/sys/geom/part/g_part_if.m index 8252f0cadc3..04440fef122 100644 --- a/sys/geom/part/g_part_if.m +++ b/sys/geom/part/g_part_if.m @@ -58,6 +58,13 @@ CODE { { return (0); } + + static int + default_resize(struct g_part_table *t __unused, + struct g_part_entry *e __unused, struct g_part_parms *p __unused) + { + return (ENOSYS); + } }; # add() - scheme specific processing for the add verb. @@ -114,6 +121,13 @@ METHOD int modify { struct g_part_parms *gpp; }; +# resize() - scheme specific processing for the resize verb. +METHOD int resize { + struct g_part_table *table; + struct g_part_entry *entry; + struct g_part_parms *gpp; +} DEFAULT default_resize; + # name() - return the name of the given partition entry. # Typical names are "p1", "s0" or "c". METHOD const char * name { diff --git a/sys/geom/part/g_part_mbr.c b/sys/geom/part/g_part_mbr.c index 72d0ecbfe10..8b5ba2779a6 100644 --- a/sys/geom/part/g_part_mbr.c +++ b/sys/geom/part/g_part_mbr.c @@ -76,6 +76,8 @@ static int g_part_mbr_setunset(struct g_part_table *, struct g_part_entry *, static const char *g_part_mbr_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_mbr_write(struct g_part_table *, struct g_consumer *); +static int g_part_mbr_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_mbr_methods[] = { KOBJMETHOD(g_part_add, g_part_mbr_add), @@ -85,6 +87,7 @@ static kobj_method_t g_part_mbr_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_mbr_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_mbr_dumpto), KOBJMETHOD(g_part_modify, g_part_mbr_modify), + KOBJMETHOD(g_part_resize, g_part_mbr_resize), KOBJMETHOD(g_part_name, g_part_mbr_name), KOBJMETHOD(g_part_probe, g_part_mbr_probe), KOBJMETHOD(g_part_read, g_part_mbr_read), @@ -302,6 +305,31 @@ g_part_mbr_modify(struct g_part_table *basetable, return (0); } +static int +g_part_mbr_resize(struct g_part_table *basetable, + struct g_part_entry *baseentry, struct g_part_parms *gpp) +{ + struct g_part_mbr_entry *entry; + uint32_t size, sectors; + + sectors = basetable->gpt_sectors; + size = gpp->gpp_size; + + if (size < sectors) + return (EINVAL); + if (size % sectors) + size = size - (size % sectors); + if (size < sectors) + return (EINVAL); + + entry = (struct g_part_mbr_entry *)baseentry; + baseentry->gpe_end = baseentry->gpe_start + size - 1; + entry->ent.dp_size = size; + mbr_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl, + &entry->ent.dp_ehd, &entry->ent.dp_esect); + return (0); +} + static const char * g_part_mbr_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/part/g_part_pc98.c b/sys/geom/part/g_part_pc98.c index 8857f9b98f0..ab83662a4f9 100644 --- a/sys/geom/part/g_part_pc98.c +++ b/sys/geom/part/g_part_pc98.c @@ -77,6 +77,8 @@ static int g_part_pc98_setunset(struct g_part_table *, struct g_part_entry *, static const char *g_part_pc98_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_pc98_write(struct g_part_table *, struct g_consumer *); +static int g_part_pc98_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_pc98_methods[] = { KOBJMETHOD(g_part_add, g_part_pc98_add), @@ -86,6 +88,7 @@ static kobj_method_t g_part_pc98_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_pc98_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_pc98_dumpto), KOBJMETHOD(g_part_modify, g_part_pc98_modify), + KOBJMETHOD(g_part_resize, g_part_pc98_resize), KOBJMETHOD(g_part_name, g_part_pc98_name), KOBJMETHOD(g_part_probe, g_part_pc98_probe), KOBJMETHOD(g_part_read, g_part_pc98_read), @@ -308,6 +311,31 @@ g_part_pc98_modify(struct g_part_table *basetable, return (0); } +static int +g_part_pc98_resize(struct g_part_table *basetable, + struct g_part_entry *baseentry, struct g_part_parms *gpp) +{ + struct g_part_pc98_entry *entry; + uint32_t size, cyl; + + cyl = basetable->gpt_heads * basetable->gpt_sectors; + size = gpp->gpp_size; + + if (size < cyl) + return (EINVAL); + if (size % cyl) + size = size - (size % cyl); + if (size < cyl) + return (EINVAL); + + entry = (struct g_part_pc98_entry *)baseentry; + baseentry->gpe_end = baseentry->gpe_start + size - 1; + pc98_set_chs(basetable, baseentry->gpe_end, &entry->ent.dp_ecyl, + &entry->ent.dp_ehd, &entry->ent.dp_esect); + + return (0); +} + static const char * g_part_pc98_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/part/g_part_vtoc8.c b/sys/geom/part/g_part_vtoc8.c index 0108e97a093..2cabcfdfc2c 100644 --- a/sys/geom/part/g_part_vtoc8.c +++ b/sys/geom/part/g_part_vtoc8.c @@ -67,6 +67,8 @@ static int g_part_vtoc8_read(struct g_part_table *, struct g_consumer *); static const char *g_part_vtoc8_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_vtoc8_write(struct g_part_table *, struct g_consumer *); +static int g_part_vtoc8_resize(struct g_part_table *, struct g_part_entry *, + struct g_part_parms *); static kobj_method_t g_part_vtoc8_methods[] = { KOBJMETHOD(g_part_add, g_part_vtoc8_add), @@ -75,6 +77,7 @@ static kobj_method_t g_part_vtoc8_methods[] = { KOBJMETHOD(g_part_dumpconf, g_part_vtoc8_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_vtoc8_dumpto), KOBJMETHOD(g_part_modify, g_part_vtoc8_modify), + KOBJMETHOD(g_part_resize, g_part_vtoc8_resize), KOBJMETHOD(g_part_name, g_part_vtoc8_name), KOBJMETHOD(g_part_probe, g_part_vtoc8_probe), KOBJMETHOD(g_part_read, g_part_vtoc8_read), @@ -294,6 +297,26 @@ g_part_vtoc8_modify(struct g_part_table *basetable, return (0); } +static int +g_part_vtoc8_resize(struct g_part_table *basetable, + struct g_part_entry *entry, struct g_part_parms *gpp) +{ + struct g_part_vtoc8_table *table; + uint64_t size; + + table = (struct g_part_vtoc8_table *)basetable; + size = gpp->gpp_size; + if (size % table->secpercyl) + size = size - (size % table->secpercyl); + if (size < table->secpercyl) + return (EINVAL); + + entry->gpe_end = entry->gpe_start + size - 1; + be32enc(&table->vtoc.map[entry->gpe_index - 1].nblks, size); + + return (0); +} + static const char * g_part_vtoc8_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) diff --git a/sys/geom/vinum/geom_vinum_var.h b/sys/geom/vinum/geom_vinum_var.h index c57fef71a8e..eac3c2c60e9 100644 --- a/sys/geom/vinum/geom_vinum_var.h +++ b/sys/geom/vinum/geom_vinum_var.h @@ -335,9 +335,6 @@ struct gv_plex { int flags; #define GV_PLEX_ADDED 0x01 /* Added to an existing volume. */ #define GV_PLEX_SYNCING 0x02 /* Plex is syncing from another plex. */ -#define GV_PLEX_THREAD_ACTIVE 0x04 /* Plex has an active RAID5 thread. */ -#define GV_PLEX_THREAD_DIE 0x08 /* Signal the RAID5 thread to die. */ -#define GV_PLEX_THREAD_DEAD 0x10 /* The RAID5 thread has died. */ #define GV_PLEX_NEWBORN 0x20 /* The plex was just created. */ #define GV_PLEX_REBUILDING 0x40 /* The plex is rebuilding. */ #define GV_PLEX_GROWING 0x80 /* The plex is growing. */ @@ -371,9 +368,6 @@ struct gv_volume { #define GV_VOL_UP 1 int flags; -#define GV_VOL_THREAD_ACTIVE 0x01 /* Volume has an active thread. */ -#define GV_VOL_THREAD_DIE 0x02 /* Signal the thread to die. */ -#define GV_VOL_THREAD_DEAD 0x04 /* The thread has died. */ #define GV_VOL_NEWBORN 0x08 /* The volume was just created. */ LIST_HEAD(,gv_plex) plexes; /* List of attached plexes. */ diff --git a/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c b/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c index 20be557d771..b935b6468b6 100644 --- a/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c +++ b/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c @@ -598,16 +598,8 @@ xfs_write_file(xfs_inode_t *xip, struct uio *uio, int ioflag) */ #if 0 td = uio->uio_td; - if (vp->v_type == VREG && td != NULL) { - PROC_LOCK(td->td_proc); - if (uio->uio_offset + uio->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); #endif resid = uio->uio_resid; diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index d40d7c0fa9f..24f5aabeb9f 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -235,6 +235,7 @@ device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') +device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) @@ -293,6 +294,7 @@ device firmware # firmware assist module device bpf # Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) diff --git a/sys/i386/conf/XBOX b/sys/i386/conf/XBOX index 96719659ed1..2ba5738c008 100644 --- a/sys/i386/conf/XBOX +++ b/sys/i386/conf/XBOX @@ -82,6 +82,7 @@ device pty # BSD-style compatibility pseudo ttys device bpf # Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs #device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device usb # USB Bus (required) diff --git a/sys/i386/i386/bpf_jit_machdep.c b/sys/i386/i386/bpf_jit_machdep.c index e9d9ecabe40..4c2946f9f6b 100644 --- a/sys/i386/i386/bpf_jit_machdep.c +++ b/sys/i386/i386/bpf_jit_machdep.c @@ -440,62 +440,77 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) break; case BPF_JMP|BPF_JA: - JMP(stream.refs[stream.bpf_pc + ins->k] - - stream.refs[stream.bpf_pc]); + JUMP(ins->k); break; case BPF_JMP|BPF_JGT|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JA, JBE); break; case BPF_JMP|BPF_JGE|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JAE, JB); break; case BPF_JMP|BPF_JEQ|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPid(ins->k, EAX); JCC(JE, JNE); break; case BPF_JMP|BPF_JSET|BPF_K: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } TESTid(ins->k, EAX); JCC(JNE, JE); break; case BPF_JMP|BPF_JGT|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JA, JBE); break; case BPF_JMP|BPF_JGE|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JAE, JB); break; case BPF_JMP|BPF_JEQ|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } CMPrd(EDX, EAX); JCC(JE, JNE); break; case BPF_JMP|BPF_JSET|BPF_X: - if (ins->jt == 0 && ins->jf == 0) + if (ins->jt == ins->jf) { + JUMP(ins->jt); break; + } TESTrd(EDX, EAX); JCC(JNE, JE); break; diff --git a/sys/i386/i386/bpf_jit_machdep.h b/sys/i386/i386/bpf_jit_machdep.h index e82f68a2ec7..4ae5494a72a 100644 --- a/sys/i386/i386/bpf_jit_machdep.h +++ b/sys/i386/i386/bpf_jit_machdep.h @@ -418,4 +418,10 @@ typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n); } \ } while (0) +#define JUMP(off) do { \ + if ((off) != 0) \ + JMP(stream.refs[stream.bpf_pc + (off)] - \ + stream.refs[stream.bpf_pc]); \ +} while (0) + #endif /* _BPF_JIT_MACHDEP_H_ */ diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index 931bfaf6777..6f2e4a40fd1 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -727,7 +727,7 @@ printcpuinfo(void) printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ - "\002" + "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ @@ -751,7 +751,7 @@ printcpuinfo(void) "\027MOVBE" "\030POPCNT" "\031" - "\032" + "\032AESNI" /* AES Crypto*/ "\033XSAVE" "\034OSXSAVE" "\035" diff --git a/sys/i386/i386/io.c b/sys/i386/i386/io.c index c392af5f38d..152f6b1dc5e 100644 --- a/sys/i386/i386/io.c +++ b/sys/i386/i386/io.c @@ -28,60 +28,32 @@ __FBSDID("$FreeBSD$"); #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include - -#include -#include - #include +#include -/* ARGSUSED */ int -ioopen(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td) +iodev_open(struct thread *td) { - int error; - - error = priv_check(td, PRIV_IO); - if (error != 0) - return (error); - error = securelevel_gt(td->td_ucred, 0); - if (error != 0) - return (error); td->td_frame->tf_eflags |= PSL_IOPL; - return (0); } -/* ARGSUSED */ int -ioclose(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td) +iodev_close(struct thread *td) { + td->td_frame->tf_eflags &= ~PSL_IOPL; - return (0); } /* ARGSUSED */ int -ioioctl(struct cdev *dev __unused, u_long cmd __unused, caddr_t data __unused, - int fflag __unused, struct thread *td __unused) +iodev_ioctl(u_long cmd __unused, caddr_t data __unused) { - return (ENXIO); + return (ENOIOCTL); } diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c index 302535f27f9..d894d3271d0 100644 --- a/sys/i386/i386/local_apic.c +++ b/sys/i386/i386/local_apic.c @@ -71,7 +71,7 @@ __FBSDID("$FreeBSD$"); #ifdef KDTRACE_HOOKS #include -cyclic_clock_func_t lapic_cyclic_clock_func[MAXCPU]; +cyclic_clock_func_t cyclic_clock_func[MAXCPU]; #endif /* Sanity checks on IDT vectors. */ @@ -779,8 +779,8 @@ lapic_handle_timer(struct trapframe *frame) * timers. */ int cpu = PCPU_GET(cpuid); - if (lapic_cyclic_clock_func[cpu] != NULL) - (*lapic_cyclic_clock_func[cpu])(frame); + if (cyclic_clock_func[cpu] != NULL) + (*cyclic_clock_func[cpu])(frame); #endif /* Fire hardclock at hz. */ diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 7c74f00bb2c..b342a675ef5 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -296,6 +296,7 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); @@ -1345,14 +1346,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1362,13 +1368,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pte = *pmap_pte_quick(pmap, va); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1606,9 +1614,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) --m->wire_count; if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, m, free); + return (_pmap_unwire_pte_hold(pmap, m, free)); else - return 0; + return (0); } static int @@ -1642,7 +1650,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) */ pmap_add_delayed_free_list(m, free, TRUE); - return 1; + return (1); } /* @@ -1656,10 +1664,10 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) - return 0; + return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, mpte, free); + return (pmap_unwire_pte_hold(pmap, mpte, free)); } void @@ -1810,7 +1818,7 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); - return m; + return (m); } static vm_page_t @@ -2008,7 +2016,7 @@ kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - return sysctl_handle_long(oidp, &ksize, 0, req); + return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); @@ -2018,7 +2026,7 @@ kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - return sysctl_handle_long(oidp, &kfree, 0, req); + return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); @@ -2082,7 +2090,7 @@ static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { - return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) @@ -2954,18 +2962,9 @@ retry: if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); - va < eva; va += PAGE_SIZE, m++) { - /* - * In contrast to the analogous operation on a 4KB page - * mapping, the mapping's PG_A flag is not cleared and - * the page's PG_REFERENCED flag is not set. The - * reason is that pmap_demote_pde() expects that a 2/4MB - * page mapping with a stored page table page has PG_A - * set. - */ + va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - } } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); @@ -3073,22 +3072,15 @@ retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } - } - - if ((prot & VM_PROT_WRITE) == 0) pbits &= ~(PG_RW | PG_M); + } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -3661,7 +3653,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); - return mpte; + return (mpte); } /* @@ -4058,12 +4050,12 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) int loops = 0; if (m->flags & PG_FICTITIOUS) - return FALSE; + return (FALSE); mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { - return TRUE; + return (TRUE); } loops++; if (loops >= 16) @@ -4133,16 +4125,15 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int count) boolean_t pmap_page_is_mapped(vm_page_t m) { - struct md_page *pvh; + boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (TAILQ_EMPTY(&m->md.pv_list)) { - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - return (!TAILQ_EMPTY(&pvh->pv_list)); - } else - return (TRUE); + vm_page_lock_queues(); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); + vm_page_unlock_queues(); + return (rv); } /* @@ -4355,6 +4346,51 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) return (rv); } +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + if (m->flags & PG_FICTITIOUS) + return (FALSE); + if (pmap_is_referenced_pvh(&m->md)) + return (TRUE); + return (pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and 4mpage mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rv = FALSE; + sched_pin(); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte_quick(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + sched_unpin(); + return (rv); +} + /* * Clear the write and modified bits in each of the given page's mappings. */ @@ -4932,7 +4968,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) if (pte != 0) { val |= MINCORE_INCORE; if ((pte & PG_MANAGED) == 0) - return val; + return (val); m = PHYS_TO_VM_PAGE(pa); @@ -4961,14 +4997,12 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) */ vm_page_lock_queues(); if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { + pmap_is_referenced(m)) val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } vm_page_unlock_queues(); } } - return val; + return (val); } void @@ -5063,7 +5097,7 @@ pmap_pid_dump(int pid) printf("\n"); } sx_sunlock(&allproc_lock); - return npte; + return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { @@ -5088,7 +5122,7 @@ pmap_pid_dump(int pid) } } sx_sunlock(&allproc_lock); - return npte; + return (npte); } #endif diff --git a/sys/i386/include/iodev.h b/sys/i386/include/iodev.h index 1a0a17a9561..9f53cacff00 100644 --- a/sys/i386/include/iodev.h +++ b/sys/i386/include/iodev.h @@ -25,7 +25,22 @@ * * $FreeBSD$ */ +#ifndef _MACHINE_IODEV_H_ +#define _MACHINE_IODEV_H_ -d_open_t ioopen; -d_close_t ioclose; -d_ioctl_t ioioctl; +#ifdef _KERNEL +#include + +#define iodev_read_1 inb +#define iodev_read_2 inw +#define iodev_read_4 inl +#define iodev_write_1 outb +#define iodev_write_2 outw +#define iodev_write_4 outl + +int iodev_open(struct thread *td); +int iodev_close(struct thread *td); +int iodev_ioctl(u_long cmd, caddr_t data); + +#endif /* _KERNEL */ +#endif /* _MACHINE_IODEV_H_ */ diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index ae7d79d83fb..f45a9dfd679 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -420,11 +420,14 @@ struct pmap { u_int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; #ifdef PAE pdpt_entry_t *pm_pdpt; /* KVA of page director pointer table */ #endif vm_page_t pm_root; /* spare page table pages */ + }; typedef struct pmap *pmap_t; diff --git a/sys/i386/include/proc.h b/sys/i386/include/proc.h index 1e0b9f9db10..86be8c81721 100644 --- a/sys/i386/include/proc.h +++ b/sys/i386/include/proc.h @@ -57,6 +57,8 @@ struct mdproc { struct proc_ldt *md_ldt; /* (t) per-process ldt */ }; +#define KINFO_PROC_SIZE 768 + #ifdef _KERNEL /* Get the current kernel thread stack usage. */ diff --git a/sys/i386/include/specialreg.h b/sys/i386/include/specialreg.h index d2494c72100..efcf9244869 100644 --- a/sys/i386/include/specialreg.h +++ b/sys/i386/include/specialreg.h @@ -110,6 +110,7 @@ #define CPUID_PBE 0x80000000 #define CPUID2_SSE3 0x00000001 +#define CPUID2_PCLMULQDQ 0x00000002 #define CPUID2_DTES64 0x00000004 #define CPUID2_MON 0x00000008 #define CPUID2_DS_CPL 0x00000010 @@ -128,6 +129,7 @@ #define CPUID2_X2APIC 0x00200000 #define CPUID2_MOVBE 0x00400000 #define CPUID2_POPCNT 0x00800000 +#define CPUID2_AESNI 0x02000000 /* * Important bits in the AMD extended cpuid flags diff --git a/sys/i386/include/xen/xenfunc.h b/sys/i386/include/xen/xenfunc.h index 2851709cb95..47f04057aa6 100644 --- a/sys/i386/include/xen/xenfunc.h +++ b/sys/i386/include/xen/xenfunc.h @@ -1,6 +1,5 @@ -/* - * - * Copyright (c) 2004,2005 Kip Macy +/*- + * Copyright (c) 2004, 2005 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -11,24 +10,22 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ */ - #ifndef _XEN_XENFUNC_H_ #define _XEN_XENFUNC_H_ diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h index cefbb058c1c..1110f8506ae 100644 --- a/sys/i386/include/xen/xenvar.h +++ b/sys/i386/include/xen/xenvar.h @@ -1,32 +1,31 @@ -/* +/*- * Copyright (c) 2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ */ + #ifndef XENVAR_H_ #define XENVAR_H_ #define XBOOTUP 0x1 diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index ae4d4aa2add..1bd081f7fe2 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -289,6 +289,12 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); +SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, + "2/4MB page mapping counters"); + +static u_long pmap_pde_mappings; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pde_mappings, 0, "2/4MB page mappings"); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); @@ -1219,14 +1225,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = PT_GET(pmap_pde(pmap, va)); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1238,13 +1249,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) PT_SET_MA(PADDR1, 0); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2600,22 +2613,16 @@ retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; - if (pbits & PG_MANAGED) { - m = NULL; - if (pbits & PG_A) { - m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & PG_FRAME); - vm_page_flag_set(m, PG_REFERENCED); - pbits &= ~PG_A; - } - if ((pbits & PG_M) != 0) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & PG_FRAME); + + if ((prot & VM_PROT_WRITE) == 0) { + if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == + (PG_MANAGED | PG_M | PG_RW)) { + m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & + PG_FRAME); vm_page_dirty(m); } - } - - if ((prot & VM_PROT_WRITE) == 0) pbits &= ~(PG_RW | PG_M); + } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; @@ -3129,64 +3136,59 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + pd_entry_t *pde; + vm_paddr_t pa, ptepa; vm_page_t p; + int pat_mode; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && - ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { - int i; - vm_page_t m[1]; - unsigned int ptepindex; - int npdes; - pd_entry_t ptepa; - - PMAP_LOCK(pmap); - if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) - goto out; - PMAP_UNLOCK(pmap); -retry: + (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { + if (!vm_object_populate(object, pindex, pindex + atop(size))) + return; p = vm_page_lookup(object, pindex); - if (p != NULL) { - if (vm_page_sleep_if_busy(p, FALSE, "init4p")) - goto retry; - } else { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_wakeup(p); - } - + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + pat_mode = p->md.pat_mode; + /* + * Abort the mapping if the first page is not physically + * aligned to a 2/4MB page boundary. + */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; - - p->valid = VM_PAGE_BITS_ALL; - - PMAP_LOCK(pmap); - pmap->pm_stats.resident_count += size >> PAGE_SHIFT; - npdes = size >> PDRSHIFT; - critical_enter(); - for(i = 0; i < npdes; i++) { - PD_SET_VA(pmap, ptepindex, - ptepa | PG_U | PG_M | PG_RW | PG_V | PG_PS, FALSE); - ptepa += NBPDR; - ptepindex += 1; + /* + * Skip the first page. Abort the mapping if the rest of + * the pages are not physically contiguous or have differing + * memory attributes. + */ + p = TAILQ_NEXT(p, listq); + for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; + pa += PAGE_SIZE) { + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + if (pa != VM_PAGE_TO_PHYS(p) || + pat_mode != p->md.pat_mode) + return; + p = TAILQ_NEXT(p, listq); + } + /* Map using 2/4MB pages. */ + PMAP_LOCK(pmap); + for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + + size; pa += NBPDR) { + pde = pmap_pde(pmap, addr); + if (*pde == 0) { + pde_store(pde, pa | PG_PS | PG_M | PG_A | + PG_U | PG_RW | PG_V); + pmap->pm_stats.resident_count += NBPDR / + PAGE_SIZE; + pmap_pde_mappings++; + } + /* Else continue on if the PDE is already valid. */ + addr += NBPDR; } - pmap_invalidate_all(pmap); - critical_exit(); -out: PMAP_UNLOCK(pmap); } } @@ -3718,6 +3720,34 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) return (rv); } +boolean_t +pmap_is_referenced(vm_page_t m) +{ + pv_entry_t pv; + pt_entry_t *pte; + pmap_t pmap; + boolean_t rv; + + rv = FALSE; + if (m->flags & PG_FICTITIOUS) + return (rv); + sched_pin(); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte_quick(pmap, pv->pv_va); + rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); + PMAP_UNLOCK(pmap); + if (rv) + break; + } + if (*PMAP1) + PT_SET_MA(PADDR1, 0); + sched_unpin(); + return (rv); +} + void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) { @@ -4145,10 +4175,8 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) */ vm_page_lock_queues(); if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { + pmap_is_referenced(m)) val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } vm_page_unlock_queues(); } } diff --git a/sys/ia64/conf/GENERIC b/sys/ia64/conf/GENERIC index 9b53bf3f1f7..cf06a295860 100644 --- a/sys/ia64/conf/GENERIC +++ b/sys/ia64/conf/GENERIC @@ -116,6 +116,7 @@ device ida # Compaq Smart RAID device mlx # Mylex DAC960 family # USB host controllers and peripherals +options USB_DEBUG # enable debug msgs device ehci # EHCI host controller device ohci # OHCI PCI->USB interface device uhci # UHCI PCI->USB interface diff --git a/sys/ia64/ia64/iodev_machdep.c b/sys/ia64/ia64/iodev_machdep.c index d255aae127b..9d9057dc76e 100644 --- a/sys/ia64/ia64/iodev_machdep.c +++ b/sys/ia64/ia64/iodev_machdep.c @@ -40,31 +40,13 @@ __FBSDID("$FreeBSD$"); #include #include -static int iodev_pio_read(struct iodev_pio_req *req); -static int iodev_pio_write(struct iodev_pio_req *req); - static int iodev_efivar_getvar(struct iodev_efivar_req *req); static int iodev_efivar_nextname(struct iodev_efivar_req *req); static int iodev_efivar_setvar(struct iodev_efivar_req *req); /* ARGSUSED */ int -ioopen(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td) -{ - int error; - - error = priv_check(td, PRIV_IO); - if (error == 0) - error = securelevel_gt(td->td_ucred, 0); - - return (error); -} - -/* ARGSUSED */ -int -ioclose(struct cdev *dev __unused, int flags __unused, int fmt __unused, - struct thread *td __unused) +iodev_open(struct thread *td __unused) { return (0); @@ -72,29 +54,19 @@ ioclose(struct cdev *dev __unused, int flags __unused, int fmt __unused, /* ARGSUSED */ int -ioioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, - int fflag __unused, struct thread *td __unused) +iodev_close(struct thread *td __unused) +{ + + return (0); +} + +int +iodev_ioctl(u_long cmd, caddr_t data) { struct iodev_efivar_req *efivar_req; - struct iodev_pio_req *pio_req; int error; - error = ENOIOCTL; switch (cmd) { - case IODEV_PIO: - pio_req = (struct iodev_pio_req *)data; - switch (pio_req->access) { - case IODEV_PIO_READ: - error = iodev_pio_read(pio_req); - break; - case IODEV_PIO_WRITE: - error = iodev_pio_write(pio_req); - break; - default: - error = EINVAL; - break; - } - break; case IODEV_EFIVAR: efivar_req = (struct iodev_efivar_req *)data; efivar_req->result = 0; /* So it's well-defined */ @@ -113,77 +85,13 @@ ioioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, break; } break; + default: + error = ENOIOCTL; } return (error); } -static int -iodev_pio_read(struct iodev_pio_req *req) -{ - - switch (req->width) { - case 1: - req->val = bus_space_read_io_1(req->port); - break; - case 2: - if (req->port & 1) { - req->val = bus_space_read_io_1(req->port); - req->val |= bus_space_read_io_1(req->port + 1) << 8; - } else - req->val = bus_space_read_io_2(req->port); - break; - case 4: - if (req->port & 1) { - req->val = bus_space_read_io_1(req->port); - req->val |= bus_space_read_io_2(req->port + 1) << 8; - req->val |= bus_space_read_io_1(req->port + 3) << 24; - } else if (req->port & 2) { - req->val = bus_space_read_io_2(req->port); - req->val |= bus_space_read_io_2(req->port + 2) << 16; - } else - req->val = bus_space_read_io_4(req->port); - break; - default: - return (EINVAL); - } - - return (0); -} - -static int -iodev_pio_write(struct iodev_pio_req *req) -{ - - switch (req->width) { - case 1: - bus_space_write_io_1(req->port, req->val); - break; - case 2: - if (req->port & 1) { - bus_space_write_io_1(req->port, req->val); - bus_space_write_io_1(req->port + 1, req->val >> 8); - } else - bus_space_write_io_2(req->port, req->val); - break; - case 4: - if (req->port & 1) { - bus_space_write_io_1(req->port, req->val); - bus_space_write_io_2(req->port + 1, req->val >> 8); - bus_space_write_io_1(req->port + 3, req->val >> 24); - } else if (req->port & 2) { - bus_space_write_io_2(req->port, req->val); - bus_space_write_io_2(req->port + 2, req->val >> 16); - } else - bus_space_write_io_4(req->port, req->val); - break; - default: - return (EINVAL); - } - - return (0); -} - static int iodev_efivar_getvar(struct iodev_efivar_req *req) { diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 6fe4cdf987e..7cc18c15289 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -1028,18 +1028,22 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct ia64_lpte *pte; pmap_t oldpmap; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); +retry: pte = pmap_find_vhpt(va); if (pte != NULL && pmap_present(pte) && (pmap_prot(pte) & prot) == prot) { m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) + goto retry; vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); return (m); @@ -1450,19 +1454,13 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (pmap_prot(pte) == prot) continue; - if (pmap_managed(pte)) { - vm_offset_t pa = pmap_ppn(pte); + if ((prot & VM_PROT_WRITE) == 0 && + pmap_managed(pte) && pmap_dirty(pte)) { + vm_paddr_t pa = pmap_ppn(pte); vm_page_t m = PHYS_TO_VM_PAGE(pa); - if (pmap_dirty(pte)) { - vm_page_dirty(m); - pmap_clear_dirty(pte); - } - - if (pmap_accessed(pte)) { - vm_page_flag_set(m, PG_REFERENCED); - pmap_clear_accessed(pte); - } + vm_page_dirty(m); + pmap_clear_dirty(pte); } if (prot & VM_PROT_EXECUTE) @@ -2022,6 +2020,37 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) return (TRUE); } +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + struct ia64_lpte *pte; + pmap_t oldpmap; + pv_entry_t pv; + boolean_t rv; + + rv = FALSE; + if (m->flags & PG_FICTITIOUS) + return (rv); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + PMAP_LOCK(pv->pv_pmap); + oldpmap = pmap_switch(pv->pv_pmap); + pte = pmap_find_vhpt(pv->pv_va); + pmap_switch(oldpmap); + KASSERT(pte != NULL, ("pte")); + rv = pmap_accessed(pte) ? TRUE : FALSE; + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + return (rv); +} + /* * Clear the modify bits on the specified physical page. */ @@ -2197,10 +2226,8 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) * Referenced by someone */ vm_page_lock_queues(); - if (pmap_ts_referenced(m)) { + if (pmap_is_referenced(m)) val |= MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } vm_page_unlock_queues(); } } diff --git a/sys/ia64/include/iodev.h b/sys/ia64/include/iodev.h index 6d2ae19d36d..cf349d9b068 100644 --- a/sys/ia64/include/iodev.h +++ b/sys/ia64/include/iodev.h @@ -31,22 +31,16 @@ #include -struct iodev_pio_req { - u_int access; -#define IODEV_PIO_READ 0 -#define IODEV_PIO_WRITE 1 - u_int port; - u_int width; - u_int val; -}; +#ifdef _KERNEL +#include +#endif -#define IODEV_PIO _IOWR('I', 0, struct iodev_pio_req) - -struct iodev_efivar_req { - u_int access; #define IODEV_EFIVAR_GETVAR 0 #define IODEV_EFIVAR_NEXTNAME 1 #define IODEV_EFIVAR_SETVAR 2 + +struct iodev_efivar_req { + u_int access; u_int result; /* errno value */ size_t namesize; u_short *name; /* UCS-2 */ @@ -59,11 +53,16 @@ struct iodev_efivar_req { #define IODEV_EFIVAR _IOWR('I', 1, struct iodev_efivar_req) #ifdef _KERNEL +#define iodev_read_1 bus_space_read_io_1 +#define iodev_read_2 bus_space_read_io_2 +#define iodev_read_4 bus_space_read_io_4 +#define iodev_write_1 bus_space_write_io_1 +#define iodev_write_2 bus_space_write_io_2 +#define iodev_write_4 bus_space_write_io_4 -d_open_t ioopen; -d_close_t ioclose; -d_ioctl_t ioioctl; - -#endif +int iodev_open(struct thread *td); +int iodev_close(struct thread *td); +int iodev_ioctl(u_long, caddr_t data); +#endif /* _KERNEL */ #endif /* _MACHINE_IODEV_H_ */ diff --git a/sys/ia64/include/pmap.h b/sys/ia64/include/pmap.h index 44079c88129..ff059fd5563 100644 --- a/sys/ia64/include/pmap.h +++ b/sys/ia64/include/pmap.h @@ -77,6 +77,8 @@ struct pmap { TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ u_int32_t pm_rid[5]; /* base RID for pmap */ struct pmap_statistics pm_stats; /* pmap statistics */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; }; typedef struct pmap *pmap_t; diff --git a/sys/ia64/include/proc.h b/sys/ia64/include/proc.h index 6bf9c78c88b..e9f337cbb48 100644 --- a/sys/ia64/include/proc.h +++ b/sys/ia64/include/proc.h @@ -38,4 +38,7 @@ struct mdproc { int __dummy; /* Avoid having an empty struct. */ }; +#define KINFO_PROC_SIZE 1088 +#define KINFO_PROC32_SIZE 768 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 236894f4b2d..c48e0f5f471 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1088,8 +1088,10 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) hdrsize = 0; __elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count); - if (hdrsize + seginfo.size >= limit) - return (EFAULT); + if (hdrsize + seginfo.size >= limit) { + error = EFAULT; + goto done; + } /* * Allocate memory for building the header, fill it up, @@ -1097,7 +1099,8 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); if (hdr == NULL) { - return (EINVAL); + error = EINVAL; + goto done; } error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize, gzfile); @@ -1125,8 +1128,8 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) curproc->p_comm, error); } -#ifdef COMPRESS_USER_CORES done: +#ifdef COMPRESS_USER_CORES if (core_buf) free(core_buf, M_TEMP); if (gzfile) diff --git a/sys/kern/kern_alq.c b/sys/kern/kern_alq.c index 6722d9a4db1..98e6de885cf 100644 --- a/sys/kern/kern_alq.c +++ b/sys/kern/kern_alq.c @@ -55,16 +55,23 @@ __FBSDID("$FreeBSD$"); /* Async. Logging Queue */ struct alq { + char *aq_entbuf; /* Buffer for stored entries */ int aq_entmax; /* Max entries */ int aq_entlen; /* Entry length */ - char *aq_entbuf; /* Buffer for stored entries */ + int aq_freebytes; /* Bytes available in buffer */ + int aq_buflen; /* Total length of our buffer */ + int aq_writehead; /* Location for next write */ + int aq_writetail; /* Flush starts at this location */ + int aq_wrapearly; /* # bytes left blank at end of buf */ int aq_flags; /* Queue flags */ + int aq_waiters; /* Num threads waiting for resources + * NB: Used as a wait channel so must + * not be first field in the alq struct + */ + struct ale aq_getpost; /* ALE for use by get/post */ struct mtx aq_mtx; /* Queue lock */ struct vnode *aq_vp; /* Open vnode handle */ struct ucred *aq_cred; /* Credentials of the opening thread */ - struct ale *aq_first; /* First ent */ - struct ale *aq_entfree; /* First free ent */ - struct ale *aq_entvalid; /* First ent valid for writing */ LIST_ENTRY(alq) aq_act; /* List of active queues */ LIST_ENTRY(alq) aq_link; /* List of all queues */ }; @@ -73,10 +80,14 @@ struct alq { #define AQ_ACTIVE 0x0002 /* on the active list */ #define AQ_FLUSHING 0x0004 /* doing IO */ #define AQ_SHUTDOWN 0x0008 /* Queue no longer valid */ +#define AQ_ORDERED 0x0010 /* Queue enforces ordered writes */ +#define AQ_LEGACY 0x0020 /* Legacy queue (fixed length writes) */ #define ALQ_LOCK(alq) mtx_lock_spin(&(alq)->aq_mtx) #define ALQ_UNLOCK(alq) mtx_unlock_spin(&(alq)->aq_mtx) +#define HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen) + static MALLOC_DEFINE(M_ALD, "ALD", "ALD"); /* @@ -205,7 +216,7 @@ ald_daemon(void) needwakeup = alq_doio(alq); ALQ_UNLOCK(alq); if (needwakeup) - wakeup(alq); + wakeup_one(alq); ALD_LOCK(); } @@ -252,6 +263,20 @@ alq_shutdown(struct alq *alq) /* Stop any new writers. */ alq->aq_flags |= AQ_SHUTDOWN; + /* + * If the ALQ isn't active but has unwritten data (possible if + * the ALQ_NOACTIVATE flag has been used), explicitly activate the + * ALQ here so that the pending data gets flushed by the ald_daemon. + */ + if (!(alq->aq_flags & AQ_ACTIVE) && HAS_PENDING_DATA(alq)) { + alq->aq_flags |= AQ_ACTIVE; + ALQ_UNLOCK(alq); + ALD_LOCK(); + ald_activate(alq); + ALD_UNLOCK(); + ALQ_LOCK(alq); + } + /* Drain IO */ while (alq->aq_flags & AQ_ACTIVE) { alq->aq_flags |= AQ_WANTED; @@ -271,7 +296,6 @@ alq_destroy(struct alq *alq) alq_shutdown(alq); mtx_destroy(&alq->aq_mtx); - free(alq->aq_first, M_ALD); free(alq->aq_entbuf, M_ALD); free(alq, M_ALD); } @@ -287,46 +311,54 @@ alq_doio(struct alq *alq) struct vnode *vp; struct uio auio; struct iovec aiov[2]; - struct ale *ale; - struct ale *alstart; int totlen; int iov; int vfslocked; + int wrapearly; + + KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__)); vp = alq->aq_vp; td = curthread; totlen = 0; - iov = 0; - - alstart = ale = alq->aq_entvalid; - alq->aq_entvalid = NULL; + iov = 1; + wrapearly = alq->aq_wrapearly; bzero(&aiov, sizeof(aiov)); bzero(&auio, sizeof(auio)); - do { - if (aiov[iov].iov_base == NULL) - aiov[iov].iov_base = ale->ae_data; - aiov[iov].iov_len += alq->aq_entlen; - totlen += alq->aq_entlen; - /* Check to see if we're wrapping the buffer */ - if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data) - iov++; - ale->ae_flags &= ~AE_VALID; - ale = ale->ae_next; - } while (ale->ae_flags & AE_VALID); + /* Start the write from the location of our buffer tail pointer. */ + aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail; + + if (alq->aq_writetail < alq->aq_writehead) { + /* Buffer not wrapped. */ + totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail; + } else if (alq->aq_writehead == 0) { + /* Buffer not wrapped (special case to avoid an empty iov). */ + totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail - + wrapearly; + } else { + /* + * Buffer wrapped, requires 2 aiov entries: + * - first is from writetail to end of buffer + * - second is from start of buffer to writehead + */ + aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail - + wrapearly; + iov++; + aiov[1].iov_base = alq->aq_entbuf; + aiov[1].iov_len = alq->aq_writehead; + totlen = aiov[0].iov_len + aiov[1].iov_len; + } alq->aq_flags |= AQ_FLUSHING; ALQ_UNLOCK(alq); - if (iov == 2 || aiov[iov].iov_base == NULL) - iov--; - auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; - auio.uio_iovcnt = iov + 1; + auio.uio_iovcnt = iov; auio.uio_resid = totlen; auio.uio_td = td; @@ -350,8 +382,28 @@ alq_doio(struct alq *alq) ALQ_LOCK(alq); alq->aq_flags &= ~AQ_FLUSHING; - if (alq->aq_entfree == NULL) - alq->aq_entfree = alstart; + /* Adjust writetail as required, taking into account wrapping. */ + alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) % + alq->aq_buflen; + alq->aq_freebytes += totlen + wrapearly; + + /* + * If we just flushed part of the buffer which wrapped, reset the + * wrapearly indicator. + */ + if (wrapearly) + alq->aq_wrapearly = 0; + + /* + * If we just flushed the buffer completely, reset indexes to 0 to + * minimise buffer wraps. + * This is also required to ensure alq_getn() can't wedge itself. + */ + if (!HAS_PENDING_DATA(alq)) + alq->aq_writehead = alq->aq_writetail = 0; + + KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen), + ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__)); if (alq->aq_flags & AQ_WANTED) { alq->aq_flags &= ~AQ_WANTED; @@ -376,27 +428,27 @@ SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL); /* * Create the queue data structure, allocate the buffer, and open the file. */ + int -alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, - int size, int count) +alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode, + int size, int flags) { struct thread *td; struct nameidata nd; - struct ale *ale; - struct ale *alp; struct alq *alq; - char *bufp; - int flags; + int oflags; int error; - int i, vfslocked; + int vfslocked; + + KASSERT((size > 0), ("%s: size <= 0", __func__)); *alqp = NULL; td = curthread; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td); - flags = FWRITE | O_NOFOLLOW | O_CREAT; + oflags = FWRITE | O_NOFOLLOW | O_CREAT; - error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL); + error = vn_open_cred(&nd, &oflags, cmode, 0, cred, NULL); if (error) return (error); @@ -407,31 +459,20 @@ alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, VFS_UNLOCK_GIANT(vfslocked); alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO); - alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO); - alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO); alq->aq_vp = nd.ni_vp; alq->aq_cred = crhold(cred); - alq->aq_entmax = count; - alq->aq_entlen = size; - alq->aq_entfree = alq->aq_first; mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET); - bufp = alq->aq_entbuf; - ale = alq->aq_first; - alp = NULL; + alq->aq_buflen = size; + alq->aq_entmax = 0; + alq->aq_entlen = 0; - /* Match up entries with buffers */ - for (i = 0; i < count; i++) { - if (alp) - alp->ae_next = ale; - ale->ae_data = bufp; - alp = ale; - ale++; - bufp += size; - } - - alp->ae_next = alq->aq_first; + alq->aq_freebytes = alq->aq_buflen; + alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO); + alq->aq_writehead = alq->aq_writetail = 0; + if (flags & ALQ_ORDERED) + alq->aq_flags |= AQ_ORDERED; if ((error = ald_add(alq)) != 0) { alq_destroy(alq); @@ -443,77 +484,405 @@ alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, return (0); } +int +alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, + int size, int count) +{ + int ret; + + KASSERT((count >= 0), ("%s: count < 0", __func__)); + + if (count > 0) { + ret = alq_open_flags(alqp, file, cred, cmode, size*count, 0); + (*alqp)->aq_flags |= AQ_LEGACY; + (*alqp)->aq_entmax = count; + (*alqp)->aq_entlen = size; + } else + ret = alq_open_flags(alqp, file, cred, cmode, size, 0); + + return (ret); +} + + /* * Copy a new entry into the queue. If the operation would block either * wait or return an error depending on the value of waitok. */ int -alq_write(struct alq *alq, void *data, int waitok) +alq_writen(struct alq *alq, void *data, int len, int flags) { - struct ale *ale; + int activate, copy, ret; + void *waitchan; - if ((ale = alq_get(alq, waitok)) == NULL) - return (EWOULDBLOCK); + KASSERT((len > 0 && len <= alq->aq_buflen), + ("%s: len <= 0 || len > aq_buflen", __func__)); - bcopy(data, ale->ae_data, alq->aq_entlen); - alq_post(alq, ale); - - return (0); -} - -struct ale * -alq_get(struct alq *alq, int waitok) -{ - struct ale *ale; - struct ale *aln; - - ale = NULL; + activate = ret = 0; + copy = len; + waitchan = NULL; ALQ_LOCK(alq); - /* Loop until we get an entry or we're shutting down */ - while ((alq->aq_flags & AQ_SHUTDOWN) == 0 && - (ale = alq->aq_entfree) == NULL && - (waitok & ALQ_WAITOK)) { - alq->aq_flags |= AQ_WANTED; - msleep_spin(alq, &alq->aq_mtx, "alqget", 0); + /* + * Fail to perform the write and return EWOULDBLOCK if: + * - The message is larger than our underlying buffer. + * - The ALQ is being shutdown. + * - There is insufficient free space in our underlying buffer + * to accept the message and the user can't wait for space. + * - There is insufficient free space in our underlying buffer + * to accept the message and the alq is inactive due to prior + * use of the ALQ_NOACTIVATE flag (which would lead to deadlock). + */ + if (len > alq->aq_buflen || + alq->aq_flags & AQ_SHUTDOWN || + (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) && + HAS_PENDING_DATA(alq))) && alq->aq_freebytes < len)) { + ALQ_UNLOCK(alq); + return (EWOULDBLOCK); } - if (ale != NULL) { - aln = ale->ae_next; - if ((aln->ae_flags & AE_VALID) == 0) - alq->aq_entfree = aln; + /* + * If we want ordered writes and there is already at least one thread + * waiting for resources to become available, sleep until we're woken. + */ + if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) { + KASSERT(!(flags & ALQ_NOWAIT), + ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__)); + alq->aq_waiters++; + msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqwnord", 0); + alq->aq_waiters--; + } + + /* + * (ALQ_WAITOK && aq_freebytes < len) or aq_freebytes >= len, either + * enter while loop and sleep until we have enough free bytes (former) + * or skip (latter). If AQ_ORDERED is set, only 1 thread at a time will + * be in this loop. Otherwise, multiple threads may be sleeping here + * competing for ALQ resources. + */ + while (alq->aq_freebytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) { + KASSERT(!(flags & ALQ_NOWAIT), + ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__)); + alq->aq_flags |= AQ_WANTED; + alq->aq_waiters++; + if (waitchan) + wakeup(waitchan); + msleep_spin(alq, &alq->aq_mtx, "alqwnres", 0); + alq->aq_waiters--; + + /* + * If we're the first thread to wake after an AQ_WANTED wakeup + * but there isn't enough free space for us, we're going to loop + * and sleep again. If there are other threads waiting in this + * loop, schedule a wakeup so that they can see if the space + * they require is available. + */ + if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) && + alq->aq_freebytes < len && !(alq->aq_flags & AQ_WANTED)) + waitchan = alq; else - alq->aq_entfree = NULL; + waitchan = NULL; + } + + /* + * If there are waiters, we need to signal the waiting threads after we + * complete our work. The alq ptr is used as a wait channel for threads + * requiring resources to be freed up. In the AQ_ORDERED case, threads + * are not allowed to concurrently compete for resources in the above + * while loop, so we use a different wait channel in this case. + */ + if (alq->aq_waiters > 0) { + if (alq->aq_flags & AQ_ORDERED) + waitchan = &alq->aq_waiters; + else + waitchan = alq; } else - ALQ_UNLOCK(alq); + waitchan = NULL; + /* Bail if we're shutting down. */ + if (alq->aq_flags & AQ_SHUTDOWN) { + ret = EWOULDBLOCK; + goto unlock; + } - return (ale); -} + /* + * If we need to wrap the buffer to accommodate the write, + * we'll need 2 calls to bcopy. + */ + if ((alq->aq_buflen - alq->aq_writehead) < len) + copy = alq->aq_buflen - alq->aq_writehead; -void -alq_post(struct alq *alq, struct ale *ale) -{ - int activate; + /* Copy message (or part thereof if wrap required) to the buffer. */ + bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy); + alq->aq_writehead += copy; - ale->ae_flags |= AE_VALID; + if (alq->aq_writehead >= alq->aq_buflen) { + KASSERT((alq->aq_writehead == alq->aq_buflen), + ("%s: alq->aq_writehead (%d) > alq->aq_buflen (%d)", + __func__, + alq->aq_writehead, + alq->aq_buflen)); + alq->aq_writehead = 0; + } - if (alq->aq_entvalid == NULL) - alq->aq_entvalid = ale; + if (copy != len) { + /* + * Wrap the buffer by copying the remainder of our message + * to the start of the buffer and resetting aq_writehead. + */ + bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy); + alq->aq_writehead = len - copy; + } - if ((alq->aq_flags & AQ_ACTIVE) == 0) { + KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen), + ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__)); + + alq->aq_freebytes -= len; + + if (!(alq->aq_flags & AQ_ACTIVE) && !(flags & ALQ_NOACTIVATE)) { alq->aq_flags |= AQ_ACTIVE; activate = 1; - } else - activate = 0; + } + KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__)); + +unlock: ALQ_UNLOCK(alq); + if (activate) { ALD_LOCK(); ald_activate(alq); ALD_UNLOCK(); } + + /* NB: We rely on wakeup_one waking threads in a FIFO manner. */ + if (waitchan != NULL) + wakeup_one(waitchan); + + return (ret); +} + +int +alq_write(struct alq *alq, void *data, int flags) +{ + /* Should only be called in fixed length message (legacy) mode. */ + KASSERT((alq->aq_flags & AQ_LEGACY), + ("%s: fixed length write on variable length queue", __func__)); + return (alq_writen(alq, data, alq->aq_entlen, flags)); +} + +/* + * Retrieve a pointer for the ALQ to write directly into, avoiding bcopy. + */ +struct ale * +alq_getn(struct alq *alq, int len, int flags) +{ + int contigbytes; + void *waitchan; + + KASSERT((len > 0 && len <= alq->aq_buflen), + ("%s: len <= 0 || len > alq->aq_buflen", __func__)); + + waitchan = NULL; + + ALQ_LOCK(alq); + + /* + * Determine the number of free contiguous bytes. + * We ensure elsewhere that if aq_writehead == aq_writetail because + * the buffer is empty, they will both be set to 0 and therefore + * aq_freebytes == aq_buflen and is fully contiguous. + * If they are equal and the buffer is not empty, aq_freebytes will + * be 0 indicating the buffer is full. + */ + if (alq->aq_writehead <= alq->aq_writetail) + contigbytes = alq->aq_freebytes; + else { + contigbytes = alq->aq_buflen - alq->aq_writehead; + + if (contigbytes < len) { + /* + * Insufficient space at end of buffer to handle a + * contiguous write. Wrap early if there's space at + * the beginning. This will leave a hole at the end + * of the buffer which we will have to skip over when + * flushing the buffer to disk. + */ + if (alq->aq_writetail >= len || flags & ALQ_WAITOK) { + /* Keep track of # bytes left blank. */ + alq->aq_wrapearly = contigbytes; + /* Do the wrap and adjust counters. */ + contigbytes = alq->aq_freebytes = + alq->aq_writetail; + alq->aq_writehead = 0; + } + } + } + + /* + * Return a NULL ALE if: + * - The message is larger than our underlying buffer. + * - The ALQ is being shutdown. + * - There is insufficient free space in our underlying buffer + * to accept the message and the user can't wait for space. + * - There is insufficient free space in our underlying buffer + * to accept the message and the alq is inactive due to prior + * use of the ALQ_NOACTIVATE flag (which would lead to deadlock). + */ + if (len > alq->aq_buflen || + alq->aq_flags & AQ_SHUTDOWN || + (((flags & ALQ_NOWAIT) || (!(alq->aq_flags & AQ_ACTIVE) && + HAS_PENDING_DATA(alq))) && contigbytes < len)) { + ALQ_UNLOCK(alq); + return (NULL); + } + + /* + * If we want ordered writes and there is already at least one thread + * waiting for resources to become available, sleep until we're woken. + */ + if (alq->aq_flags & AQ_ORDERED && alq->aq_waiters > 0) { + KASSERT(!(flags & ALQ_NOWAIT), + ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__)); + alq->aq_waiters++; + msleep_spin(&alq->aq_waiters, &alq->aq_mtx, "alqgnord", 0); + alq->aq_waiters--; + } + + /* + * (ALQ_WAITOK && contigbytes < len) or contigbytes >= len, either enter + * while loop and sleep until we have enough contiguous free bytes + * (former) or skip (latter). If AQ_ORDERED is set, only 1 thread at a + * time will be in this loop. Otherwise, multiple threads may be + * sleeping here competing for ALQ resources. + */ + while (contigbytes < len && !(alq->aq_flags & AQ_SHUTDOWN)) { + KASSERT(!(flags & ALQ_NOWAIT), + ("%s: ALQ_NOWAIT set but incorrectly ignored!", __func__)); + alq->aq_flags |= AQ_WANTED; + alq->aq_waiters++; + if (waitchan) + wakeup(waitchan); + msleep_spin(alq, &alq->aq_mtx, "alqgnres", 0); + alq->aq_waiters--; + + if (alq->aq_writehead <= alq->aq_writetail) + contigbytes = alq->aq_freebytes; + else + contigbytes = alq->aq_buflen - alq->aq_writehead; + + /* + * If we're the first thread to wake after an AQ_WANTED wakeup + * but there isn't enough free space for us, we're going to loop + * and sleep again. If there are other threads waiting in this + * loop, schedule a wakeup so that they can see if the space + * they require is available. + */ + if (alq->aq_waiters > 0 && !(alq->aq_flags & AQ_ORDERED) && + contigbytes < len && !(alq->aq_flags & AQ_WANTED)) + waitchan = alq; + else + waitchan = NULL; + } + + /* + * If there are waiters, we need to signal the waiting threads after we + * complete our work. The alq ptr is used as a wait channel for threads + * requiring resources to be freed up. In the AQ_ORDERED case, threads + * are not allowed to concurrently compete for resources in the above + * while loop, so we use a different wait channel in this case. + */ + if (alq->aq_waiters > 0) { + if (alq->aq_flags & AQ_ORDERED) + waitchan = &alq->aq_waiters; + else + waitchan = alq; + } else + waitchan = NULL; + + /* Bail if we're shutting down. */ + if (alq->aq_flags & AQ_SHUTDOWN) { + ALQ_UNLOCK(alq); + if (waitchan != NULL) + wakeup_one(waitchan); + return (NULL); + } + + /* + * If we are here, we have a contiguous number of bytes >= len + * available in our buffer starting at aq_writehead. + */ + alq->aq_getpost.ae_data = alq->aq_entbuf + alq->aq_writehead; + alq->aq_getpost.ae_bytesused = len; + + return (&alq->aq_getpost); +} + +struct ale * +alq_get(struct alq *alq, int flags) +{ + /* Should only be called in fixed length message (legacy) mode. */ + KASSERT((alq->aq_flags & AQ_LEGACY), + ("%s: fixed length get on variable length queue", __func__)); + return (alq_getn(alq, alq->aq_entlen, flags)); +} + +void +alq_post_flags(struct alq *alq, struct ale *ale, int flags) +{ + int activate; + void *waitchan; + + activate = 0; + + if (ale->ae_bytesused > 0) { + if (!(alq->aq_flags & AQ_ACTIVE) && + !(flags & ALQ_NOACTIVATE)) { + alq->aq_flags |= AQ_ACTIVE; + activate = 1; + } + + alq->aq_writehead += ale->ae_bytesused; + alq->aq_freebytes -= ale->ae_bytesused; + + /* Wrap aq_writehead if we filled to the end of the buffer. */ + if (alq->aq_writehead == alq->aq_buflen) + alq->aq_writehead = 0; + + KASSERT((alq->aq_writehead >= 0 && + alq->aq_writehead < alq->aq_buflen), + ("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", + __func__)); + + KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__)); + } + + /* + * If there are waiters, we need to signal the waiting threads after we + * complete our work. The alq ptr is used as a wait channel for threads + * requiring resources to be freed up. In the AQ_ORDERED case, threads + * are not allowed to concurrently compete for resources in the + * alq_getn() while loop, so we use a different wait channel in this case. + */ + if (alq->aq_waiters > 0) { + if (alq->aq_flags & AQ_ORDERED) + waitchan = &alq->aq_waiters; + else + waitchan = alq; + } else + waitchan = NULL; + + ALQ_UNLOCK(alq); + + if (activate) { + ALD_LOCK(); + ald_activate(alq); + ALD_UNLOCK(); + } + + /* NB: We rely on wakeup_one waking threads in a FIFO manner. */ + if (waitchan != NULL) + wakeup_one(waitchan); } void @@ -523,16 +892,24 @@ alq_flush(struct alq *alq) ALD_LOCK(); ALQ_LOCK(alq); - if (alq->aq_flags & AQ_ACTIVE) { - ald_deactivate(alq); + + /* + * Pull the lever iff there is data to flush and we're + * not already in the middle of a flush operation. + */ + if (HAS_PENDING_DATA(alq) && !(alq->aq_flags & AQ_FLUSHING)) { + if (alq->aq_flags & AQ_ACTIVE) + ald_deactivate(alq); + ALD_UNLOCK(); needwakeup = alq_doio(alq); } else ALD_UNLOCK(); + ALQ_UNLOCK(alq); if (needwakeup) - wakeup(alq); + wakeup_one(alq); } /* diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 98e276bb34a..da05cc16356 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -163,6 +163,7 @@ SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, #ifdef DEADLKRES static const char *blessed[] = { + "getblk", "so_snd_sx", "so_rcv_sx", NULL diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index a2e292e40f0..17a6827254e 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -505,7 +505,7 @@ giant_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, } static void -notify(struct cdev *dev, const char *ev) +notify(struct cdev *dev, const char *ev, int flags) { static const char prefix[] = "cdev="; char *data; @@ -514,7 +514,8 @@ notify(struct cdev *dev, const char *ev) if (cold) return; namelen = strlen(dev->si_name); - data = malloc(namelen + sizeof(prefix), M_TEMP, M_NOWAIT); + data = malloc(namelen + sizeof(prefix), M_TEMP, + (flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK); if (data == NULL) return; memcpy(data, prefix, sizeof(prefix) - 1); @@ -524,17 +525,17 @@ notify(struct cdev *dev, const char *ev) } static void -notify_create(struct cdev *dev) +notify_create(struct cdev *dev, int flags) { - notify(dev, "CREATE"); + notify(dev, "CREATE", flags); } static void notify_destroy(struct cdev *dev) { - notify(dev, "DESTROY"); + notify(dev, "DESTROY", MAKEDEV_WAITOK); } static struct cdev * @@ -572,24 +573,27 @@ fini_cdevsw(struct cdevsw *devsw) devsw->d_flags &= ~D_INIT; } -static void -prep_cdevsw(struct cdevsw *devsw) +static int +prep_cdevsw(struct cdevsw *devsw, int flags) { struct cdevsw *dsw2; mtx_assert(&devmtx, MA_OWNED); if (devsw->d_flags & D_INIT) - return; + return (1); if (devsw->d_flags & D_NEEDGIANT) { dev_unlock(); - dsw2 = malloc(sizeof *dsw2, M_DEVT, M_WAITOK); + dsw2 = malloc(sizeof *dsw2, M_DEVT, + (flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK); dev_lock(); + if (dsw2 == NULL && !(devsw->d_flags & D_INIT)) + return (0); } else dsw2 = NULL; if (devsw->d_flags & D_INIT) { if (dsw2 != NULL) cdevsw_free_devlocked(dsw2); - return; + return (1); } if (devsw->d_version != D_VERSION_03) { @@ -647,6 +651,7 @@ prep_cdevsw(struct cdevsw *devsw) if (dsw2 != NULL) cdevsw_free_devlocked(dsw2); + return (1); } static struct cdev * @@ -657,9 +662,15 @@ make_dev_credv(int flags, struct cdevsw *devsw, int unit, struct cdev *dev; int i; - dev = devfs_alloc(); + dev = devfs_alloc(flags); + if (dev == NULL) + return (NULL); dev_lock(); - prep_cdevsw(devsw); + if (!prep_cdevsw(devsw, flags)) { + dev_unlock(); + devfs_free(dev); + return (NULL); + } dev = newdev(devsw, unit, dev); if (flags & MAKEDEV_REF) dev_refl(dev); @@ -686,8 +697,6 @@ make_dev_credv(int flags, struct cdevsw *devsw, int unit, dev->si_flags |= SI_NAMED; if (cr != NULL) dev->si_cred = crhold(cr); - else - dev->si_cred = NULL; dev->si_uid = uid; dev->si_gid = gid; dev->si_mode = mode; @@ -696,7 +705,7 @@ make_dev_credv(int flags, struct cdevsw *devsw, int unit, clean_unrhdrl(devfs_inos); dev_unlock_and_free(); - notify_create(dev); + notify_create(dev, flags); return (dev); } @@ -771,7 +780,7 @@ make_dev_alias(struct cdev *pdev, const char *fmt, ...) int i; KASSERT(pdev != NULL, ("NULL pdev")); - dev = devfs_alloc(); + dev = devfs_alloc(MAKEDEV_WAITOK); dev_lock(); dev->si_flags |= SI_ALIAS; dev->si_flags |= SI_NAMED; @@ -788,7 +797,7 @@ make_dev_alias(struct cdev *pdev, const char *fmt, ...) clean_unrhdrl(devfs_inos); dev_unlock(); - notify_create(dev); + notify_create(dev, MAKEDEV_WAITOK); return (dev); } @@ -973,9 +982,9 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, * the end of the list. */ unit = *up; - ndev = devfs_alloc(); + ndev = devfs_alloc(MAKEDEV_WAITOK); dev_lock(); - prep_cdevsw(csw); + prep_cdevsw(csw, MAKEDEV_WAITOK); low = extra; de = dl = NULL; cd = *cdp; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 676de650524..302ca5eeaf4 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2896,7 +2896,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) free(sa, M_SONAME); } if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) - == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { + == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { bcopy(sa, &kif->kf_sa_peer, sa->sa_len); free(sa, M_SONAME); } @@ -3149,7 +3149,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) free(sa, M_SONAME); } if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) - == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { + == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { bcopy(sa, &kif->kf_sa_peer, sa->sa_len); free(sa, M_SONAME); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index ed22519f693..fc87d639404 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -948,18 +948,18 @@ exec_map_first_page(imgp) rv = vm_pager_get_pages(object, ma, initial_pagein, 0); ma[0] = vm_page_lookup(object, 0); if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { - if (ma[0]) { - vm_page_lock_queues(); + if (ma[0] != NULL) { + vm_page_lock(ma[0]); vm_page_free(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); } VM_OBJECT_UNLOCK(object); return (EIO); } } - vm_page_lock_queues(); + vm_page_lock(ma[0]); vm_page_hold(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); vm_page_wakeup(ma[0]); VM_OBJECT_UNLOCK(object); @@ -979,9 +979,9 @@ exec_unmap_first_page(imgp) m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } } diff --git a/sys/kern/kern_gzio.c b/sys/kern/kern_gzio.c index 1f9c387dffa..c1b2b3f7025 100644 --- a/sys/kern/kern_gzio.c +++ b/sys/kern/kern_gzio.c @@ -166,7 +166,7 @@ gzFile gz_open (path, mode, vp) 0 /*xflags*/, OS_CODE); if ((error = vn_rdwr(UIO_WRITE, s->file, buf, GZ_HEADER_LEN, s->outoff, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, curproc->p_ucred, + UIO_SYSSPACE, IO_UNIT, curproc->p_ucred, NOCRED, &resid, curthread))) { s->outoff += GZ_HEADER_LEN - resid; return destroy(s), (gzFile)Z_NULL; @@ -234,7 +234,7 @@ int ZEXPORT gzwrite (file, buf, len) s->stream.next_out = s->outbuf; vfslocked = VFS_LOCK_GIANT(s->file->v_mount); error = vn_rdwr_inchunks(UIO_WRITE, s->file, s->outbuf, Z_BUFSIZE, - curoff, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, + curoff, UIO_SYSSPACE, IO_UNIT, curproc->p_ucred, NOCRED, &resid, curthread); VFS_UNLOCK_GIANT(vfslocked); if (error) { @@ -291,7 +291,7 @@ local int do_flush (file, flush) if (len != 0) { vfslocked = VFS_LOCK_GIANT(s->file->v_mount); error = vn_rdwr_inchunks(UIO_WRITE, s->file, s->outbuf, len, curoff, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, curproc->p_ucred, + UIO_SYSSPACE, IO_UNIT, curproc->p_ucred, NOCRED, &resid, curthread); VFS_UNLOCK_GIANT(vfslocked); if (error) { @@ -350,7 +350,7 @@ local void putU32 (s, x) xx = x; #endif vn_rdwr(UIO_WRITE, s->file, (caddr_t)&xx, sizeof(xx), curoff, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, curproc->p_ucred, + UIO_SYSSPACE, IO_UNIT, curproc->p_ucred, NOCRED, &resid, curthread); s->outoff += sizeof(xx) - resid; } diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c index 6117cd6cbba..9586ae65278 100644 --- a/sys/kern/kern_ktr.c +++ b/sys/kern/kern_ktr.c @@ -199,9 +199,8 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, struct ktr_entry *entry; #ifdef KTR_ALQ struct ale *ale = NULL; -#else - int newindex, saveindex; #endif + int newindex, saveindex; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) struct thread *td; #endif @@ -221,27 +220,30 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, td->td_pflags |= TDP_INKTR; #endif #ifdef KTR_ALQ - if (ktr_alq_enabled && - td->td_critnest == 0 && - (td->td_flags & TDF_IDLETD) == 0 && - td != ald_thread) { - if (ktr_alq_max && ktr_alq_cnt > ktr_alq_max) - goto done; - if ((ale = alq_get(ktr_alq, ALQ_NOWAIT)) == NULL) { - ktr_alq_failed++; + if (ktr_alq_enabled) { + if (td->td_critnest == 0 && + (td->td_flags & TDF_IDLETD) == 0 && + td != ald_thread) { + if (ktr_alq_max && ktr_alq_cnt > ktr_alq_max) + goto done; + if ((ale = alq_get(ktr_alq, ALQ_NOWAIT)) == NULL) { + ktr_alq_failed++; + goto done; + } + ktr_alq_cnt++; + entry = (struct ktr_entry *)ale->ae_data; + } else { goto done; } - ktr_alq_cnt++; - entry = (struct ktr_entry *)ale->ae_data; } else - goto done; -#else - do { - saveindex = ktr_idx; - newindex = (saveindex + 1) & (KTR_ENTRIES - 1); - } while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0); - entry = &ktr_buf[saveindex]; #endif + { + do { + saveindex = ktr_idx; + newindex = (saveindex + 1) & (KTR_ENTRIES - 1); + } while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0); + entry = &ktr_buf[saveindex]; + } entry->ktr_timestamp = KTR_TIME; entry->ktr_cpu = cpu; entry->ktr_thread = curthread; @@ -271,7 +273,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, entry->ktr_parms[4] = arg5; entry->ktr_parms[5] = arg6; #ifdef KTR_ALQ - if (ale) + if (ktr_alq_enabled && ale) alq_post(ktr_alq, ale); done: #endif @@ -295,7 +297,9 @@ DB_SHOW_COMMAND(ktr, db_ktr_all) tstate.cur = (ktr_idx - 1) & (KTR_ENTRIES - 1); tstate.first = -1; - db_ktr_verbose = index(modif, 'v') != NULL; + db_ktr_verbose = 0; + db_ktr_verbose |= (index(modif, 'v') != NULL) ? 2 : 0; + db_ktr_verbose |= (index(modif, 'V') != NULL) ? 1 : 0; /* just timestap please */ if (index(modif, 'a') != NULL) { db_disable_pager(); while (cncheckc() != -1) @@ -329,9 +333,11 @@ db_mach_vtrace(void) db_printf(":cpu%d", kp->ktr_cpu); #endif db_printf(")"); - if (db_ktr_verbose) { - db_printf(" %10.10lld %s.%d", (long long)kp->ktr_timestamp, - kp->ktr_file, kp->ktr_line); + if (db_ktr_verbose >= 1) { + db_printf(" %10.10lld", (long long)kp->ktr_timestamp); + } + if (db_ktr_verbose >= 2) { + db_printf(" %s.%d", kp->ktr_file, kp->ktr_line); } db_printf(": "); db_printf(kp->ktr_desc, kp->ktr_parms[0], kp->ktr_parms[1], diff --git a/sys/kern/kern_ntptime.c b/sys/kern/kern_ntptime.c index 2d877404cf5..c427ac45134 100644 --- a/sys/kern/kern_ntptime.c +++ b/sys/kern/kern_ntptime.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -198,22 +199,11 @@ static long pps_errcnt; /* calibration errors */ static void ntp_init(void); static void hardupdate(long offset); static void ntp_gettime1(struct ntptimeval *ntvp); +static int ntp_is_time_error(void); -static void -ntp_gettime1(struct ntptimeval *ntvp) +static int +ntp_is_time_error(void) { - struct timespec atv; /* nanosecond time */ - - GIANT_REQUIRED; - - nanotime(&atv); - ntvp->time.tv_sec = atv.tv_sec; - ntvp->time.tv_nsec = atv.tv_nsec; - ntvp->maxerror = time_maxerror; - ntvp->esterror = time_esterror; - ntvp->tai = time_tai; - ntvp->time_state = time_state; - /* * Status word error decode. If any of these conditions occur, * an error is returned, instead of the status word. Most @@ -243,6 +233,27 @@ ntp_gettime1(struct ntptimeval *ntvp) */ (time_status & STA_PPSFREQ && time_status & (STA_PPSWANDER | STA_PPSERROR))) + return (1); + + return (0); +} + +static void +ntp_gettime1(struct ntptimeval *ntvp) +{ + struct timespec atv; /* nanosecond time */ + + GIANT_REQUIRED; + + nanotime(&atv); + ntvp->time.tv_sec = atv.tv_sec; + ntvp->time.tv_nsec = atv.tv_nsec; + ntvp->maxerror = time_maxerror; + ntvp->esterror = time_esterror; + ntvp->tai = time_tai; + ntvp->time_state = time_state; + + if (ntp_is_time_error()) ntvp->time_state = TIME_ERROR; } @@ -442,21 +453,11 @@ ntp_adjtime(struct thread *td, struct ntp_adjtime_args *uap) if (error) goto done2; - /* - * Status word error decode. See comments in - * ntp_gettime() routine. - */ - if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) || - (time_status & (STA_PPSFREQ | STA_PPSTIME) && - !(time_status & STA_PPSSIGNAL)) || - (time_status & STA_PPSTIME && - time_status & STA_PPSJITTER) || - (time_status & STA_PPSFREQ && - time_status & (STA_PPSWANDER | STA_PPSERROR))) { + if (ntp_is_time_error()) td->td_retval[0] = TIME_ERROR; - } else { + else td->td_retval[0] = time_state; - } + done2: mtx_unlock(&Giant); return (error); @@ -972,3 +973,67 @@ kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta) return (0); } +static struct callout resettodr_callout; +static int resettodr_period = 1800; + +static void +periodic_resettodr(void *arg __unused) +{ + + if (!ntp_is_time_error()) { + mtx_lock(&Giant); + resettodr(); + mtx_unlock(&Giant); + } + if (resettodr_period > 0) + callout_schedule(&resettodr_callout, resettodr_period * hz); +} + +static void +shutdown_resettodr(void *arg __unused, int howto __unused) +{ + + callout_drain(&resettodr_callout); + if (resettodr_period > 0 && !ntp_is_time_error()) { + mtx_lock(&Giant); + resettodr(); + mtx_unlock(&Giant); + } +} + +static int +sysctl_resettodr_period(SYSCTL_HANDLER_ARGS) +{ + int error; + + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (error || !req->newptr) + return (error); + if (resettodr_period == 0) + callout_stop(&resettodr_callout); + else + callout_reset(&resettodr_callout, resettodr_period * hz, + periodic_resettodr, NULL); + return (0); +} + +SYSCTL_PROC(_machdep, OID_AUTO, rtc_save_period, CTLTYPE_INT|CTLFLAG_RW, + &resettodr_period, 1800, sysctl_resettodr_period, "I", + "Save system time to RTC with this period (in seconds)"); +TUNABLE_INT("machdep.rtc_save_period", &resettodr_period); + +static void +start_periodic_resettodr(void *arg __unused) +{ + + EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_resettodr, NULL, + SHUTDOWN_PRI_FIRST); + callout_init(&resettodr_callout, 1); + if (resettodr_period == 0) + return; + callout_reset(&resettodr_callout, resettodr_period * hz, + periodic_resettodr, NULL); +} + +SYSINIT(periodic_resettodr, SI_SUB_RUN_SCHEDULER, SI_ORDER_ANY - 1, + start_periodic_resettodr, NULL); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index c41909d9dc5..055c844fcfe 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -79,6 +80,11 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef COMPAT_FREEBSD32 +#include +#include +#endif + SDT_PROVIDER_DEFINE(proc); SDT_PROBE_DEFINE(proc, kernel, ctor, entry); SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 0, "struct proc *"); @@ -145,6 +151,9 @@ int kstack_pages = KSTACK_PAGES; SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, ""); CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); +#ifdef COMPAT_FREEBSD32 +CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE); +#endif /* * Initialize global process hashing structures. @@ -892,7 +901,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread) kp->ki_pri.pri_user = td->td_user_pri; if (preferthread) { - kp->ki_runtime = cputick2usec(td->td_runtime); + kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime); kp->ki_pctcpu = sched_pctcpu(td); kp->ki_estcpu = td->td_estcpu; } @@ -969,6 +978,126 @@ zpfind(pid_t pid) #define KERN_PROC_ZOMBMASK 0x3 #define KERN_PROC_NOTHREADS 0x4 +#ifdef COMPAT_FREEBSD32 + +/* + * This function is typically used to copy out the kernel address, so + * it can be replaced by assignment of zero. + */ +static inline uint32_t +ptr32_trim(void *ptr) +{ + uintptr_t uptr; + + uptr = (uintptr_t)ptr; + return ((uptr > UINT_MAX) ? 0 : uptr); +} + +#define PTRTRIM_CP(src,dst,fld) \ + do { (dst).fld = ptr32_trim((src).fld); } while (0) + +static void +freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32) +{ + int i; + + bzero(ki32, sizeof(struct kinfo_proc32)); + ki32->ki_structsize = sizeof(struct kinfo_proc32); + CP(*ki, *ki32, ki_layout); + PTRTRIM_CP(*ki, *ki32, ki_args); + PTRTRIM_CP(*ki, *ki32, ki_paddr); + PTRTRIM_CP(*ki, *ki32, ki_addr); + PTRTRIM_CP(*ki, *ki32, ki_tracep); + PTRTRIM_CP(*ki, *ki32, ki_textvp); + PTRTRIM_CP(*ki, *ki32, ki_fd); + PTRTRIM_CP(*ki, *ki32, ki_vmspace); + PTRTRIM_CP(*ki, *ki32, ki_wchan); + CP(*ki, *ki32, ki_pid); + CP(*ki, *ki32, ki_ppid); + CP(*ki, *ki32, ki_pgid); + CP(*ki, *ki32, ki_tpgid); + CP(*ki, *ki32, ki_sid); + CP(*ki, *ki32, ki_tsid); + CP(*ki, *ki32, ki_jobc); + CP(*ki, *ki32, ki_tdev); + CP(*ki, *ki32, ki_siglist); + CP(*ki, *ki32, ki_sigmask); + CP(*ki, *ki32, ki_sigignore); + CP(*ki, *ki32, ki_sigcatch); + CP(*ki, *ki32, ki_uid); + CP(*ki, *ki32, ki_ruid); + CP(*ki, *ki32, ki_svuid); + CP(*ki, *ki32, ki_rgid); + CP(*ki, *ki32, ki_svgid); + CP(*ki, *ki32, ki_ngroups); + for (i = 0; i < KI_NGROUPS; i++) + CP(*ki, *ki32, ki_groups[i]); + CP(*ki, *ki32, ki_size); + CP(*ki, *ki32, ki_rssize); + CP(*ki, *ki32, ki_swrss); + CP(*ki, *ki32, ki_tsize); + CP(*ki, *ki32, ki_dsize); + CP(*ki, *ki32, ki_ssize); + CP(*ki, *ki32, ki_xstat); + CP(*ki, *ki32, ki_acflag); + CP(*ki, *ki32, ki_pctcpu); + CP(*ki, *ki32, ki_estcpu); + CP(*ki, *ki32, ki_slptime); + CP(*ki, *ki32, ki_swtime); + CP(*ki, *ki32, ki_runtime); + TV_CP(*ki, *ki32, ki_start); + TV_CP(*ki, *ki32, ki_childtime); + CP(*ki, *ki32, ki_flag); + CP(*ki, *ki32, ki_kiflag); + CP(*ki, *ki32, ki_traceflag); + CP(*ki, *ki32, ki_stat); + CP(*ki, *ki32, ki_nice); + CP(*ki, *ki32, ki_lock); + CP(*ki, *ki32, ki_rqindex); + CP(*ki, *ki32, ki_oncpu); + CP(*ki, *ki32, ki_lastcpu); + bcopy(ki->ki_ocomm, ki32->ki_ocomm, OCOMMLEN + 1); + bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1); + bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1); + bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1); + bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1); + bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1); + CP(*ki, *ki32, ki_cr_flags); + CP(*ki, *ki32, ki_jid); + CP(*ki, *ki32, ki_numthreads); + CP(*ki, *ki32, ki_tid); + CP(*ki, *ki32, ki_pri); + freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage); + freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch); + PTRTRIM_CP(*ki, *ki32, ki_pcb); + PTRTRIM_CP(*ki, *ki32, ki_kstack); + PTRTRIM_CP(*ki, *ki32, ki_udata); + CP(*ki, *ki32, ki_sflag); + CP(*ki, *ki32, ki_tdflags); +} + +static int +sysctl_out_proc_copyout(struct kinfo_proc *ki, struct sysctl_req *req) +{ + struct kinfo_proc32 ki32; + int error; + + if (req->flags & SCTL_MASK32) { + freebsd32_kinfo_proc_out(ki, &ki32); + error = SYSCTL_OUT(req, &ki32, sizeof(struct kinfo_proc32)); + } else + error = SYSCTL_OUT(req, ki, sizeof(struct kinfo_proc)); + return (error); +} +#else +static int +sysctl_out_proc_copyout(struct kinfo_proc *ki, struct sysctl_req *req) +{ + + return (SYSCTL_OUT(req, ki, sizeof(struct kinfo_proc))); +} +#endif + /* * Must be called with the process locked and will return with it unlocked. */ @@ -986,13 +1115,11 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) fill_kinfo_proc(p, &kinfo_proc); if (flags & KERN_PROC_NOTHREADS) - error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, - sizeof(kinfo_proc)); + error = sysctl_out_proc_copyout(&kinfo_proc, req); else { FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &kinfo_proc, 1); - error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, - sizeof(kinfo_proc)); + error = sysctl_out_proc_copyout(&kinfo_proc, req); if (error) break; } diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index f8678395904..0bc78d0ae2e 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -76,6 +76,7 @@ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); +static void ruxagg(struct proc *p, struct thread *td); /* * Resource controls and accounting. @@ -629,9 +630,7 @@ lim_cb(void *arg) return; PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - ruxagg(&p->p_rux, td); - thread_unlock(td); + ruxagg(p, td); } PROC_SUNLOCK(p); if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { @@ -842,9 +841,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp) FOREACH_THREAD_IN_PROC(p, td) { if (td->td_incruntime == 0) continue; - thread_lock(td); - ruxagg(&p->p_rux, td); - thread_unlock(td); + ruxagg(p, td); } calcru1(p, &p->p_rux, up, sp); } @@ -945,10 +942,7 @@ getrusage(td, uap) } int -kern_getrusage(td, who, rup) - struct thread *td; - int who; - struct rusage *rup; +kern_getrusage(struct thread *td, int who, struct rusage *rup) { struct proc *p; int error; @@ -967,6 +961,16 @@ kern_getrusage(td, who, rup) calccru(p, &rup->ru_utime, &rup->ru_stime); break; + case RUSAGE_THREAD: + PROC_SLOCK(p); + ruxagg(p, td); + PROC_SUNLOCK(p); + thread_lock(td); + *rup = td->td_ru; + calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime); + thread_unlock(td); + break; + default: error = EINVAL; } @@ -1007,7 +1011,7 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, * Aggregate tick counts into the proc's rusage_ext. */ void -ruxagg(struct rusage_ext *rux, struct thread *td) +ruxagg_locked(struct rusage_ext *rux, struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -1016,10 +1020,20 @@ ruxagg(struct rusage_ext *rux, struct thread *td) rux->rux_uticks += td->td_uticks; rux->rux_sticks += td->td_sticks; rux->rux_iticks += td->td_iticks; +} + +static void +ruxagg(struct proc *p, struct thread *td) +{ + + thread_lock(td); + ruxagg_locked(&p->p_rux, td); + ruxagg_locked(&td->td_rux, td); td->td_incruntime = 0; td->td_uticks = 0; td->td_iticks = 0; td->td_sticks = 0; + thread_unlock(td); } /* @@ -1036,9 +1050,7 @@ rufetch(struct proc *p, struct rusage *ru) *ru = p->p_ru; if (p->p_numthreads > 0) { FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - ruxagg(&p->p_rux, td); - thread_unlock(td); + ruxagg(p, td); rucollect(ru, &td->td_ru); } } diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 9b28af40c28..bd3c7aa478c 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -62,7 +62,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include /* smp_active */ +#include #include #include @@ -485,15 +485,26 @@ static void shutdown_reset(void *junk, int howto) { - /* - * Disable interrupts on CPU0 in order to avoid fast handlers - * to preempt the stopping process and to deadlock against other - * CPUs. - */ - spinlock_enter(); - printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ + + /* + * Acquiring smp_ipi_mtx here has a double effect: + * - it disables interrupts avoiding CPU0 preemption + * by fast handlers (thus deadlocking against other CPUs) + * - it avoids deadlocks against smp_rendezvous() or, more + * generally, threads busy-waiting, with this spinlock held, + * and waiting for responses by threads on other CPUs + * (ie. smp_tlb_shootdown()). + * + * For the !SMP case it just needs to handle the former problem. + */ +#ifdef SMP + mtx_lock_spin(&smp_ipi_mtx); +#else + spinlock_enter(); +#endif + /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 706433d3604..789bb612529 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -3004,8 +3004,9 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td, char *temp; size_t i; int indexpos; - char hostname[MAXHOSTNAMELEN]; + char *hostname; + hostname = NULL; format = corefilename; temp = malloc(MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO); if (temp == NULL) @@ -3021,8 +3022,21 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td, sbuf_putc(&sb, '%'); break; case 'H': /* hostname */ + if (hostname == NULL) { + hostname = malloc(MAXHOSTNAMELEN, + M_TEMP, M_NOWAIT); + if (hostname == NULL) { + log(LOG_ERR, + "pid %ld (%s), uid (%lu): " + "unable to alloc memory " + "for corefile hostname\n", + (long)pid, name, + (u_long)uid); + goto nomem; + } + } getcredhostname(td->td_ucred, hostname, - sizeof(hostname)); + MAXHOSTNAMELEN); sbuf_printf(&sb, "%s", hostname); break; case 'I': /* autoincrementing index */ @@ -3048,15 +3062,17 @@ expand_name(const char *name, uid_t uid, pid_t pid, struct thread *td, sbuf_putc(&sb, format[i]); } } + free(hostname, M_TEMP); #ifdef COMPRESS_USER_CORES if (compress) { sbuf_printf(&sb, GZ_SUFFIX); } #endif if (sbuf_overflowed(&sb)) { - sbuf_delete(&sb); log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " "long\n", (long)pid, name, (u_long)uid); +nomem: + sbuf_delete(&sb); free(temp, M_TEMP); return (NULL); } diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 9be4c2f3eb3..d7a9199c3e5 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -398,7 +398,7 @@ thread_exit(void) /* * The test below is NOT true if we are the - * sole exiting thread. P_STOPPED_SNGL is unset + * sole exiting thread. P_STOPPED_SINGLE is unset * in exit1() after it is the only survivor. */ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { @@ -431,8 +431,7 @@ thread_exit(void) #endif PROC_UNLOCK(p); thread_lock(td); - /* Save our tick information with both the thread and proc locked */ - ruxagg(&p->p_rux, td); + ruxagg_locked(&p->p_rux, td); PROC_SUNLOCK(p); td->td_state = TDS_INACTIVE; #ifdef WITNESS diff --git a/sys/kern/subr_bufring.c b/sys/kern/subr_bufring.c index 63938eaca44..4cd39292622 100644 --- a/sys/kern/subr_bufring.c +++ b/sys/kern/subr_bufring.c @@ -1,32 +1,29 @@ -/************************************************************************** - * - * Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org +/*- + * Copyright (c) 2007, 2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. The name of Kip Macy nor the names of other - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * - ***************************************************************************/ + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include __FBSDID("$FreeBSD$"); diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c index 6e939c0ed77..9d3292aac45 100644 --- a/sys/kern/subr_bus.c +++ b/sys/kern/subr_bus.c @@ -545,15 +545,16 @@ devctl_queue_data(char *data) struct proc *p; if (strlen(data) == 0) - return; + goto out; if (devctl_queue_length == 0) - return; + goto out; n1 = malloc(sizeof(*n1), M_BUS, M_NOWAIT); if (n1 == NULL) - return; + goto out; n1->dei_data = data; mtx_lock(&devsoftc.mtx); if (devctl_queue_length == 0) { + mtx_unlock(&devsoftc.mtx); free(n1->dei_data, M_BUS); free(n1, M_BUS); return; @@ -577,6 +578,14 @@ devctl_queue_data(char *data) psignal(p, SIGIO); PROC_UNLOCK(p); } + return; +out: + /* + * We have to free data on all error paths since the caller + * assumes it will be free'd when this item is dequeued. + */ + free(data, M_BUS); + return; } /** diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 8405b3d642b..3b1826984db 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -51,7 +51,6 @@ struct taskqueue { const char *tq_name; taskqueue_enqueue_fn tq_enqueue; void *tq_context; - struct task *tq_running; struct mtx tq_mutex; struct thread **tq_threads; int tq_tcount; @@ -233,13 +232,13 @@ taskqueue_run(struct taskqueue *queue) STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); pending = task->ta_pending; task->ta_pending = 0; - queue->tq_running = task; + task->ta_flags |= TA_FLAGS_RUNNING; TQ_UNLOCK(queue); task->ta_func(task->ta_context, pending); TQ_LOCK(queue); - queue->tq_running = NULL; + task->ta_flags &= ~TA_FLAGS_RUNNING; wakeup(task); } @@ -256,14 +255,16 @@ taskqueue_drain(struct taskqueue *queue, struct task *task) { if (queue->tq_spin) { /* XXX */ mtx_lock_spin(&queue->tq_mutex); - while (task->ta_pending != 0 || task == queue->tq_running) + while (task->ta_pending != 0 || + (task->ta_flags & TA_FLAGS_RUNNING) != 0) msleep_spin(task, &queue->tq_mutex, "-", 0); mtx_unlock_spin(&queue->tq_mutex); } else { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); mtx_lock(&queue->tq_mutex); - while (task->ta_pending != 0 || task == queue->tq_running) + while (task->ta_pending != 0 || + (task->ta_flags & TA_FLAGS_RUNNING) != 0) msleep(task, &queue->tq_mutex, PWAIT, "-", 0); mtx_unlock(&queue->tq_mutex); } diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c index 725b1a83f9b..783a8d625f3 100644 --- a/sys/kern/subr_uio.c +++ b/sys/kern/subr_uio.c @@ -104,9 +104,11 @@ retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) goto retry; + vm_page_lock(user_pg); vm_page_lock_queues(); pmap_remove_all(user_pg); vm_page_free(user_pg); + vm_page_unlock(user_pg); } else { /* * Even if a physical page does not exist in the diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 5b7d5651750..ef1bc398dfd 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -596,6 +596,15 @@ static struct witness_order_list_entry order_lists[] = { { "vnode interlock", &lock_class_mtx_sleep }, { "cdev", &lock_class_mtx_sleep }, { NULL, NULL }, + /* + * VM + * + */ + { "vm object", &lock_class_mtx_sleep }, + { "page lock", &lock_class_mtx_sleep }, + { "vm page queue mutex", &lock_class_mtx_sleep }, + { "pmap", &lock_class_mtx_sleep }, + { NULL, NULL }, /* * kqueue/VFS interaction */ diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index e098648a669..7130c263e48 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -773,10 +773,12 @@ pipe_build_write_buffer(wpipe, uio) */ race: if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { - vm_page_lock_queues(); - for (j = 0; j < i; j++) + + for (j = 0; j < i; j++) { + vm_page_lock(wpipe->pipe_map.ms[j]); vm_page_unhold(wpipe->pipe_map.ms[j]); - vm_page_unlock_queues(); + vm_page_unlock(wpipe->pipe_map.ms[j]); + } return (EFAULT); } wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, @@ -816,11 +818,11 @@ pipe_destroy_write_buffer(wpipe) int i; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); - vm_page_lock_queues(); for (i = 0; i < wpipe->pipe_map.npages; i++) { + vm_page_lock(wpipe->pipe_map.ms[i]); vm_page_unhold(wpipe->pipe_map.ms[i]); + vm_page_unlock(wpipe->pipe_map.ms[i]); } - vm_page_unlock_queues(); wpipe->pipe_map.npages = 0; } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index f5671d9c608..d8cc4f01d2d 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -328,9 +328,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Hold the page in memory. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * We're done with tmap now. @@ -349,9 +349,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Release the page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); diff --git a/sys/kern/uipc_cow.c b/sys/kern/uipc_cow.c index 52988dd7e79..853b70b3bf0 100644 --- a/sys/kern/uipc_cow.c +++ b/sys/kern/uipc_cow.c @@ -80,7 +80,7 @@ socow_iodone(void *addr, void *args) pp = sf_buf_page(sf); sf_buf_free(sf); /* remove COW mapping */ - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -90,7 +90,7 @@ socow_iodone(void *addr, void *args) */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.iodone++; } @@ -128,10 +128,10 @@ socow_setup(struct mbuf *m0, struct uio *uio) /* * set up COW */ - vm_page_lock_queues(); + vm_page_lock(pp); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); return (0); } @@ -140,14 +140,13 @@ socow_setup(struct mbuf *m0, struct uio *uio) */ vm_page_wire(pp); vm_page_unhold(pp); - vm_page_unlock_queues(); - + vm_page_unlock(pp); /* * Allocate an sf buf */ sf = sf_buf_alloc(pp, SFB_CATCH); - if (!sf) { - vm_page_lock_queues(); + if (sf == NULL) { + vm_page_lock(pp); vm_page_cowclear(pp); vm_page_unwire(pp, 0); /* @@ -157,7 +156,7 @@ socow_setup(struct mbuf *m0, struct uio *uio) */ if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); socow_stats.fail_sf_buf++; return(0); } diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index bc0e88d0a47..85e0e395528 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -948,9 +948,8 @@ m_adj(struct mbuf *mp, int req_len) len = 0; } } - m = mp; if (mp->m_flags & M_PKTHDR) - m->m_pkthdr.len -= (req_len - len); + mp->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a14be72064e..adcb8528743 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1715,7 +1715,7 @@ sf_buf_mext(void *addr, void *args) m = sf_buf_page(args); sf_buf_free(args); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); /* * Check for the object going away on us. This can @@ -1724,7 +1724,7 @@ sf_buf_mext(void *addr, void *args) */ if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (addr == NULL) return; sfs = addr; @@ -2108,7 +2108,7 @@ retry_space: mbstat.sf_iocnt++; } if (error) { - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); /* * See if anyone else might know about @@ -2117,10 +2117,9 @@ retry_space: */ if (pg->wire_count == 0 && pg->valid == 0 && pg->busy == 0 && !(pg->oflags & VPO_BUSY) && - pg->hold_count == 0) { + pg->hold_count == 0) vm_page_free(pg); - } - vm_page_unlock_queues(); + vm_page_unlock(pg); VM_OBJECT_UNLOCK(obj); if (error == EAGAIN) error = 0; /* not a real error */ @@ -2134,14 +2133,11 @@ retry_space: if ((sf = sf_buf_alloc(pg, (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { mbstat.sf_allocfail++; - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); - /* - * XXX: Not same check as above!? - */ - if (pg->wire_count == 0 && pg->object == NULL) - vm_page_free(pg); - vm_page_unlock_queues(); + KASSERT(pg->object != NULL, + ("kern_sendfile: object disappeared")); + vm_page_unlock(pg); error = (mnw ? EAGAIN : EINTR); break; } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 4e9cfc69957..ec8ad67fab3 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -215,6 +215,14 @@ SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0, */ static int bd_request; +/* + * Request for the buf daemon to write more buffers than is indicated by + * lodirtybuf. This may be necessary to push out excess dependencies or + * defragment the address space where a simple count of the number of dirty + * buffers is insufficient to characterize the demand for flushing them. + */ +static int bd_speedupreq; + /* * This lock synchronizes access to bd_request. */ @@ -467,12 +475,20 @@ bd_wakeup(int dirtybuflevel) * bd_speedup - speedup the buffer cache flushing code */ -static __inline void bd_speedup(void) { + int needwake; - bd_wakeup(1); + mtx_lock(&bdlock); + needwake = 0; + if (bd_speedupreq == 0 || bd_request == 0) + needwake = 1; + bd_speedupreq = 1; + bd_request = 1; + if (needwake) + wakeup(&bd_request); + mtx_unlock(&bdlock); } /* @@ -1547,7 +1563,6 @@ vfs_vmio_release(struct buf *bp) vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; @@ -1555,16 +1570,16 @@ vfs_vmio_release(struct buf *bp) * In order to keep page LRU ordering consistent, put * everything on the inactive queue. */ + vm_page_lock(m); vm_page_unwire(m, 0); /* * We don't mess with busy pages, it is * the responsibility of the process that * busied the pages to deal with them. */ - if ((m->oflags & VPO_BUSY) || (m->busy != 0)) - continue; - - if (m->wire_count == 0) { + if ((m->oflags & VPO_BUSY) == 0 && m->busy == 0 && + m->wire_count == 0) { + vm_page_lock_queues(); /* * Might as well free the page if we can and it has * no valid data. We also free the page if the @@ -1578,9 +1593,10 @@ vfs_vmio_release(struct buf *bp) } else if (buf_vm_page_count_severe()) { vm_page_try_to_cache(m); } + vm_page_unlock_queues(); } + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); @@ -2120,6 +2136,7 @@ buf_do_flush(struct vnode *vp) static void buf_daemon() { + int lodirtysave; /* * This process needs to be suspended prior to shutdown sync. @@ -2137,7 +2154,11 @@ buf_daemon() mtx_unlock(&bdlock); kproc_suspend_check(bufdaemonproc); - + lodirtysave = lodirtybuffers; + if (bd_speedupreq) { + lodirtybuffers = numdirtybuffers / 2; + bd_speedupreq = 0; + } /* * Do the flush. Limit the amount of in-transit I/O we * allow to build up, otherwise we would completely saturate @@ -2149,6 +2170,7 @@ buf_daemon() break; uio_yield(); } + lodirtybuffers = lodirtysave; /* * Only clear bd_request if we have reached our low water @@ -2920,7 +2942,6 @@ allocbuf(struct buf *bp, int size) vm_page_t m; VM_OBJECT_LOCK(bp->b_bufobj->bo_object); - vm_page_lock_queues(); for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it @@ -2930,13 +2951,15 @@ allocbuf(struct buf *bp, int size) m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, TRUE, "biodep")) - vm_page_lock_queues(); + while (vm_page_sleep_if_busy(m, TRUE, + "biodep")) + continue; bp->b_pages[i] = NULL; + vm_page_lock(m); vm_page_unwire(m, 0); + vm_page_unlock(m); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object); pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); @@ -3002,15 +3025,24 @@ allocbuf(struct buf *bp, int size) * vm_fault->getpages->cluster_read->allocbuf * */ - if (vm_page_sleep_if_busy(m, FALSE, "pgtblk")) + if ((m->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking + * and sleeping so that the page daemon + * is less likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + vm_page_sleep(m, "pgtblk"); continue; + } /* * We have a good page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); bp->b_pages[bp->b_npages] = m; ++bp->b_npages; } @@ -3838,12 +3870,12 @@ vmapbuf(struct buf *bp) retry: if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data, prot) < 0) { - vm_page_lock_queues(); for (i = 0; i < pidx; ++i) { + vm_page_lock(bp->b_pages[i]); vm_page_unhold(bp->b_pages[i]); + vm_page_unlock(bp->b_pages[i]); bp->b_pages[i] = NULL; } - vm_page_unlock_queues(); return(-1); } m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot); @@ -3874,11 +3906,12 @@ vunmapbuf(struct buf *bp) npages = bp->b_npages; pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages); - vm_page_lock_queues(); - for (pidx = 0; pidx < npages; pidx++) + for (pidx = 0; pidx < npages; pidx++) { + vm_page_lock(bp->b_pages[pidx]); vm_page_unhold(bp->b_pages[pidx]); - vm_page_unlock_queues(); - + vm_page_unlock(bp->b_pages[pidx]); + } + bp->b_data = bp->b_saveaddr; } diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 78548033ea2..a13a721850d 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -610,7 +610,9 @@ cache_enter(dvp, vp, cnp) CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, - ("cahe_enter: Adding a doomed vnode")); + ("cache_enter: Adding a doomed vnode")); + VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, + ("cache_enter: Doomed vnode used as src")); if (!doingcache) return; diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 4810e769b15..ae182e00839 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2815,6 +2815,7 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); + MNT_FLAG(MNT_SOFTDEP); #undef MNT_FLAG if (flags != 0) { if (buf[0] != '\0') diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 838f8f7be31..64f77dd46d0 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1338,3 +1339,21 @@ vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp) } return (error); } + +int +vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, + const struct thread *td) +{ + + if (vp->v_type != VREG || td == NULL) + return (0); + PROC_LOCK(td->td_proc); + if ((uoff_t)uio->uio_offset + uio->uio_resid > + lim_cur(td->td_proc, RLIMIT_FSIZE)) { + psignal(td->td_proc, SIGXFSZ); + PROC_UNLOCK(td->td_proc); + return (EFBIG); + } + PROC_UNLOCK(td->td_proc); + return (0); +} diff --git a/sys/mips/adm5120/if_admsw.c b/sys/mips/adm5120/if_admsw.c index 164e84008a2..472a80414a7 100644 --- a/sys/mips/adm5120/if_admsw.c +++ b/sys/mips/adm5120/if_admsw.c @@ -540,8 +540,8 @@ admsw_attach(device_t dev) ifp->if_init = admsw_init; ifp->if_mtu = ETHERMTU; ifp->if_baudrate = IF_Mbps(100); - IFQ_SET_MAXLEN(&ifp->if_snd, max(ADMSW_NTXLDESC, IFQ_MAXLEN)); - ifp->if_snd.ifq_drv_maxlen = max(ADMSW_NTXLDESC, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&ifp->if_snd, max(ADMSW_NTXLDESC, ifqmaxlen)); + ifp->if_snd.ifq_drv_maxlen = max(ADMSW_NTXLDESC, ifqmaxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities |= IFCAP_VLAN_MTU; diff --git a/sys/mips/atheros/if_arge.c b/sys/mips/atheros/if_arge.c index 4367bfb001d..eef1dcf037b 100644 --- a/sys/mips/atheros/if_arge.c +++ b/sys/mips/atheros/if_arge.c @@ -312,8 +312,8 @@ arge_attach(device_t dev) sc->arge_if_flags = ifp->if_flags; /* XXX: add real size */ - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); ifp->if_capenable = ifp->if_capabilities; diff --git a/sys/mips/cavium/asm_octeon.S b/sys/mips/cavium/asm_octeon.S index d9f79f134ff..94ac875a199 100644 --- a/sys/mips/cavium/asm_octeon.S +++ b/sys/mips/cavium/asm_octeon.S @@ -1,182 +1,66 @@ -/***********************license start*************** - * Copyright (c) 2003-2008 Cavium Networks (support@cavium.com). All rights - * reserved. +/*- + * Copyright (c) 2004-2010 Juli Mallett + * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * * Neither the name of Cavium Networks nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" - * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS - * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH - * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY - * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT - * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES - * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR - * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET - * POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT - * OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. - * - * - * For any questions regarding licensing please contact marketing@caviumnetworks.com - * - ***********************license end**************************************/ - -/* $FreeBSD$ */ + * $FreeBSD$ + */ #include -#include -#include -#include -#include -#include "assym.s" - + .set noreorder - -#define CPU_DISABLE_INTERRUPTS(reg, reg2, reg3) \ - mfc0 reg, MIPS_COP_0_STATUS; \ - nop; \ - move reg3, reg; \ - li reg2, ~MIPS_SR_INT_IE; \ - and reg, reg2, reg; \ - mtc0 reg, MIPS_COP_0_STATUS; \ - COP0_SYNC - - - -#define CPU_ENABLE_INTERRUPTS(reg, reg3) \ - mfc0 reg, MIPS_COP_0_STATUS; \ - nop; \ - or reg, reg, reg3; \ - mtc0 reg, MIPS_COP_0_STATUS; \ - COP0_SYNC - - -#define PUSHR(reg) \ - addiu sp,sp,-16 ; \ - sd reg, 8(sp) ; \ - nop ; - -#define POPR(reg) \ - ld reg, 8(sp) ; \ - addiu sp,sp,16 ; \ - nop ; - - - - +#ifdef SMP /* - * octeon_ciu_get_interrupt_reg_addr - * - * Given Int-X, En-X combination, return the CIU Interrupt Enable Register addr - * a0 = ciu Int-X: 0/1 - * a1 = ciu EN-0: 0/1 + * This function must be implemented in assembly because it is called early + * in AP boot without a valid stack. */ -LEAF(octeon_ciu_get_interrupt_reg_addr) - .set noreorder - .set mips3 +LEAF(platform_processor_id) + .set push + .set mips32r2 + jr ra + rdhwr v0, $0 + .set pop +END(platform_processor_id) - beqz a0, ciu_get_interrupt_reg_addr_Int_0 - nop - -ciu_get_interrupt_reg_addr_Int_1: - beqz a1, ciu_get_interrupt_reg_addr_Int_1_En_0 - nop - -ciu_get_interrupt_reg_addr_Int_1_En1: - li a0, OCTEON_CIU_ADDR_HI - dsll32 a0, a0, 0 - nop - ori a0, OCTEON_CIU_EN1_INT1_LO - j ciu_get_interrupt_reg_addr_ret - nop - -ciu_get_interrupt_reg_addr_Int_1_En_0: - li a0, OCTEON_CIU_ADDR_HI - dsll32 a0, a0, 0 - nop - ori a0, OCTEON_CIU_EN0_INT1_LO - j ciu_get_interrupt_reg_addr_ret - nop - -ciu_get_interrupt_reg_addr_Int_0: - beqz a1, ciu_get_interrupt_reg_addr_Int_0_En_0 - nop - -ciu_get_interrupt_reg_addr_Int_0_En_1: - li a0, OCTEON_CIU_ADDR_HI - dsll32 a0, a0, 0 - nop - ori a0, OCTEON_CIU_EN1_INT0_LO - j ciu_get_interrupt_reg_addr_ret - nop - -ciu_get_interrupt_reg_addr_Int_0_En_0: - li a0, OCTEON_CIU_ADDR_HI - dsll32 a0, a0, 0 - nop - ori a0, OCTEON_CIU_EN0_INT0_LO - - -ciu_get_interrupt_reg_addr_ret: - j ra - nop - - .set mips0 - .set reorder -END(octeon_ciu_get_interrupt_reg_addr) - - - /* - * octeon_ciu_mask_all_interrupts - * - * a0 = ciu Interrupt-X: 0/1 - * a1 = ciu Enable-X: 0/1 + * Called on APs to wait until they are told to launch. */ -LEAF(octeon_ciu_mask_all_interrupts) - .set noreorder - .set mips3 +LEAF(octeon_ap_wait) + jal platform_processor_id + nop - PUSHR(ra) - PUSHR(s0) - - move t0, a0 - move t1, a1 - li a0, MIPS_SR_INT_IE - CPU_DISABLE_INTERRUPTS(a2, a1, s0) - move a0, t0 - move t1, a1 - jal octeon_ciu_get_interrupt_reg_addr - nop - ld a2, 0(a0) # Dummy read - nop - move a2, zero # Clear all - sd a2, 0(a0) # Write new Enable bits - nop - CPU_ENABLE_INTERRUPTS(a2, s0) +1: ll t0, octeon_ap_boot + bne v0, t0, 1b + nop - POPR(s0) - POPR(ra) - j ra # Return - nop # (bd slot) + move t0, zero + sc t0, octeon_ap_boot - .set mips0 - .set reorder -END(octeon_ciu_mask_all_interrupts) + beqz t0, 1b + nop + j mpentry + nop +END(octeon_ap_wait) +#endif diff --git a/sys/mips/cavium/dev/rgmii/octeon_fau.c b/sys/mips/cavium/dev/rgmii/octeon_fau.c deleted file mode 100644 index fb4c0ad3a80..00000000000 --- a/sys/mips/cavium/dev/rgmii/octeon_fau.c +++ /dev/null @@ -1,83 +0,0 @@ -/***********************license start*************** - * Copyright (c) 2003-2008 Cavium Networks (support@cavium.com). All rights - * reserved. - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * * Neither the name of Cavium Networks nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" - * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS - * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH - * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY - * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT - * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES - * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR - * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET - * POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT - * OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. - * - * - * For any questions regarding licensing please contact marketing@caviumnetworks.com - * - ***********************license end**************************************/ - -/*------------------------------------------------------------------ - * octeon_fau.c Fetch & Add Block - * - *------------------------------------------------------------------ - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include - -#include -#include "octeon_fau.h" - -/* - * oct_fau_init - * - * How do we initialize FAU unit. I don't even think we can reset it. - */ -void octeon_fau_init (void) -{ -} - - -/* - * oct_fau_enable - * - * Let the Fetch/Add unit roll - */ -void octeon_fau_enable (void) -{ -} - - -/* - * oct_fau_disable - * - * disable fau - * - * Don't know if we can even do that. - */ -void octeon_fau_disable (void) -{ -} diff --git a/sys/mips/cavium/dev/rgmii/octeon_fau.h b/sys/mips/cavium/dev/rgmii/octeon_fau.h index 1683ff55e00..acf5132e2e8 100644 --- a/sys/mips/cavium/dev/rgmii/octeon_fau.h +++ b/sys/mips/cavium/dev/rgmii/octeon_fau.h @@ -217,9 +217,4 @@ static inline void octeon_fau_atomic_add64 (octeon_fau_reg_64_t reg, int64_t val } -extern void octeon_fau_init(void); -extern void octeon_fau_enable(void); -extern void octeon_fau_disable(void); - - #endif /* ___OCTEON_FAU__H___ */ diff --git a/sys/mips/cavium/dev/rgmii/octeon_rgmx.c b/sys/mips/cavium/dev/rgmii/octeon_rgmx.c index ada09461778..5b84af5dcaa 100644 --- a/sys/mips/cavium/dev/rgmii/octeon_rgmx.c +++ b/sys/mips/cavium/dev/rgmii/octeon_rgmx.c @@ -1481,7 +1481,7 @@ static void octeon_config_hw_units_post_ports (void) oct_write64(OCTEON_POW_WORKQUEUE_INT_THRESHOLD(OCTEON_POW_RX_GROUP_NUM), thr.word64); #endif - ciu_enable_interrupts(OCTEON_CORE_ID, OCTEON_RGMX_CIU_INTX, OCTEON_RGMX_CIU_ENX, + ciu_enable_interrupts(PCPU_GET(cpuid), OCTEON_RGMX_CIU_INTX, OCTEON_RGMX_CIU_ENX, (OCTEON_POW_RX_GROUP_MASK | CIU_GENTIMER_BITS_ENABLE(CIU_GENTIMER_NUM_1)), CIU_MIPS_IP2); diff --git a/sys/mips/cavium/files.octeon1 b/sys/mips/cavium/files.octeon1 index c10988c46c1..4cf31f84310 100644 --- a/sys/mips/cavium/files.octeon1 +++ b/sys/mips/cavium/files.octeon1 @@ -1,8 +1,7 @@ # $FreeBSD$ # Octeon Support Files # -mips/mips/mp_machdep.c optional smp -mips/cavium/dev/rgmii/octeon_fau.c optional rgmii +mips/cavium/asm_octeon.S optional smp mips/cavium/dev/rgmii/octeon_fpa.c optional rgmii mips/cavium/dev/rgmii/octeon_ipd.c optional rgmii mips/cavium/dev/rgmii/octeon_pko.c optional rgmii @@ -10,6 +9,7 @@ mips/cavium/dev/rgmii/octeon_rgmx.c optional rgmii mips/cavium/obio.c optional uart mips/cavium/octeon_ebt3000_cf.c optional cf mips/cavium/octeon_machdep.c standard +mips/cavium/octeon_mp.c optional smp mips/cavium/uart_bus_octeonusart.c optional uart mips/cavium/uart_cpu_octeonusart.c optional uart mips/cavium/uart_dev_oct16550.c optional uart diff --git a/sys/mips/cavium/octeon_machdep.c b/sys/mips/cavium/octeon_machdep.c index f55c7ea4dfb..d5df4b426ea 100644 --- a/sys/mips/cavium/octeon_machdep.c +++ b/sys/mips/cavium/octeon_machdep.c @@ -86,16 +86,6 @@ static void octeon_boot_params_init(register_t ptr); static uint64_t ciu_get_intr_sum_reg_addr(int core_num, int intx, int enx); static uint64_t ciu_get_intr_en_reg_addr(int core_num, int intx, int enx); -static __inline void -mips_wr_ebase(u_int32_t a0) -{ - __asm __volatile("mtc0 %[a0], $15, 1 ;" - : - : [a0] "r"(a0)); - - mips_barrier(); -} - void platform_cpu_init() { @@ -111,25 +101,6 @@ platform_reset(void) oct_write64(OCTEON_CIU_SOFT_RST, 1); } - -static inline uint32_t -octeon_disable_interrupts(void) -{ - uint32_t status_bits; - - status_bits = mips_rd_status(); - mips_wr_status(status_bits & ~MIPS_SR_INT_IE); - return (status_bits); -} - - -static inline void -octeon_set_interrupts(uint32_t status_bits) -{ - mips_wr_status(status_bits); -} - - void octeon_led_write_char(int char_position, char val) { @@ -203,82 +174,6 @@ octeon_led_run_wheel(int *prog_count, int led_position) *prog_count &= 0x7; } -#define LSR_DATAREADY 0x01 /* Data ready */ -#define LSR_THRE 0x20 /* Transmit holding register empty */ -#define LSR_TEMT 0x40 /* Transmitter Empty. THR, TSR & FIFO */ -#define USR_TXFIFO_NOTFULL 0x02 /* Uart TX FIFO Not full */ - -/* - * octeon_uart_write_byte - * - * Put out a single byte off of uart port. - */ - -void -octeon_uart_write_byte(int uart_index, uint8_t ch) -{ - uint64_t val, val2; - if (uart_index < 0 || uart_index > 1) - return; - - while (1) { - val = oct_read64(OCTEON_MIO_UART0_LSR + (uart_index * 0x400)); - val2 = oct_read64(OCTEON_MIO_UART0_USR + (uart_index * 0x400)); - if ((((uint8_t) val) & LSR_THRE) || - (((uint8_t) val2) & USR_TXFIFO_NOTFULL)) { - break; - } - } - - /* Write the byte */ - oct_write8(OCTEON_MIO_UART0_THR + (uart_index * 0x400), (uint64_t) ch); - - /* Force Flush the IOBus */ - oct_read64(OCTEON_MIO_BOOT_BIST_STAT); -} - - -void -octeon_uart_write_byte0(uint8_t ch) -{ - uint64_t val, val2; - - while (1) { - val = oct_read64(OCTEON_MIO_UART0_LSR); - val2 = oct_read64(OCTEON_MIO_UART0_USR); - if ((((uint8_t) val) & LSR_THRE) || - (((uint8_t) val2) & USR_TXFIFO_NOTFULL)) { - break; - } - } - - /* Write the byte */ - oct_write8(OCTEON_MIO_UART0_THR, (uint64_t) ch); - - /* Force Flush the IOBus */ - oct_read64(OCTEON_MIO_BOOT_BIST_STAT); -} - -/* - * octeon_uart_write_string - * - */ -void -octeon_uart_write_string(int uart_index, const char *str) -{ - /* Just loop writing one byte at a time */ - - while (*str) { - octeon_uart_write_byte(uart_index, *str); - if (*str == '\n') { - octeon_uart_write_byte(uart_index, '\r'); - } - str++; - } -} - -static char wstr[30]; - void octeon_led_write_hex(uint32_t wl) { @@ -289,44 +184,6 @@ octeon_led_write_hex(uint32_t wl) } -void octeon_uart_write_hex2(uint32_t wl, uint32_t wh) -{ - sprintf(wstr, "0x%X-0x%X ", wh, wl); - octeon_uart_write_string(0, wstr); -} - -void -octeon_uart_write_hex(uint32_t wl) -{ - sprintf(wstr, " 0x%X ", wl); - octeon_uart_write_string(0, wstr); -} - -/* - * octeon_wait_uart_flush - */ -void -octeon_wait_uart_flush(int uart_index, uint8_t ch) -{ - uint64_t val; - int64_t val3; - uint32_t cpu_status_bits; - - if (uart_index < 0 || uart_index > 1) - return; - - cpu_status_bits = octeon_disable_interrupts(); - /* Force Flush the IOBus */ - oct_read64(OCTEON_MIO_BOOT_BIST_STAT); - for (val3 = 0xfffffffff; val3 > 0; val3--) { - val = oct_read64(OCTEON_MIO_UART0_LSR + (uart_index * 0x400)); - if (((uint8_t) val) & LSR_TEMT) - break; - } - octeon_set_interrupts(cpu_status_bits); -} - - /* * octeon_debug_symbol * @@ -450,17 +307,17 @@ ciu_get_en_reg_addr_new(int corenum, int intx, int enx, int ciu_ip) /* XXX kasserts? */ if (enx < CIU_EN_0 || enx > CIU_EN_1) { printf("%s: invalid enx value %d, should be %d or %d\n", - __FUNCTION__, enx, CIU_EN_0, CIU_EN_1); + __func__, enx, CIU_EN_0, CIU_EN_1); return 0; } if (intx < CIU_INT_0 || intx > CIU_INT_1) { printf("%s: invalid intx value %d, should be %d or %d\n", - __FUNCTION__, enx, CIU_INT_0, CIU_INT_1); + __func__, enx, CIU_INT_0, CIU_INT_1); return 0; } if (ciu_ip < CIU_MIPS_IP2 || ciu_ip > CIU_MIPS_IP3) { printf("%s: invalid ciu_ip value %d, should be %d or %d\n", - __FUNCTION__, ciu_ip, CIU_MIPS_IP2, CIU_MIPS_IP3); + __func__, ciu_ip, CIU_MIPS_IP2, CIU_MIPS_IP3); return 0; } @@ -517,7 +374,7 @@ ciu_clear_int_summary(int core_num, int intx, int enx, uint64_t write_bits) core_num, intx, enx, write_bits); #endif - cpu_status_bits = octeon_disable_interrupts(); + cpu_status_bits = intr_disable(); ciu_intr_sum_reg_addr = ciu_get_intr_sum_reg_addr(core_num, intx, enx); @@ -535,7 +392,7 @@ ciu_clear_int_summary(int core_num, int intx, int enx, uint64_t write_bits) printf(" Readback: 0x%llX\n\n ", (uint64_t) oct_read64(ciu_intr_sum_reg_addr)); #endif - octeon_set_interrupts(cpu_status_bits); + intr_restore(cpu_status_bits); } /* @@ -550,7 +407,7 @@ ciu_disable_intr(int core_num, int intx, int enx) if (core_num == CIU_THIS_CORE) core_num = octeon_get_core_num(); - cpu_status_bits = octeon_disable_interrupts(); + cpu_status_bits = intr_disable(); ciu_intr_reg_addr = ciu_get_intr_en_reg_addr(core_num, intx, enx); @@ -559,7 +416,7 @@ ciu_disable_intr(int core_num, int intx, int enx) oct_write64(ciu_intr_reg_addr, 0LL); oct_read64(OCTEON_MIO_BOOT_BIST_STAT); /* Bus Barrier */ - octeon_set_interrupts(cpu_status_bits); + intr_restore(cpu_status_bits); } void @@ -580,7 +437,7 @@ ciu_dump_interrutps_enabled(int core_num, int intx, int enx, int ciu_ip) #endif if (!ciu_intr_reg_addr) { - printf("Bad call to %s\n", __FUNCTION__); + printf("Bad call to %s\n", __func__); while(1); return; } @@ -612,7 +469,7 @@ void ciu_enable_interrupts(int core_num, int intx, int enx, core_num, intx, enx, ciu_ip, set_these_interrupt_bits); #endif - cpu_status_bits = octeon_disable_interrupts(); + cpu_status_bits = intr_disable(); #ifndef OCTEON_SMP_1 ciu_intr_reg_addr = ciu_get_intr_en_reg_addr(core_num, intx, enx); @@ -621,7 +478,7 @@ void ciu_enable_interrupts(int core_num, int intx, int enx, #endif if (!ciu_intr_reg_addr) { - printf("Bad call to %s\n", __FUNCTION__); + printf("Bad call to %s\n", __func__); while(1); return; /* XXX */ } @@ -634,7 +491,7 @@ void ciu_enable_interrupts(int core_num, int intx, int enx, #endif ciu_intr_bits |= set_these_interrupt_bits; oct_write64(ciu_intr_reg_addr, ciu_intr_bits); -#ifdef OCTEON_SMP +#ifdef SMP mips_wbflush(); #endif oct_read64(OCTEON_MIO_BOOT_BIST_STAT); /* Bus Barrier */ @@ -644,7 +501,7 @@ void ciu_enable_interrupts(int core_num, int intx, int enx, (uint64_t)oct_read64(ciu_intr_reg_addr)); #endif - octeon_set_interrupts(cpu_status_bits); + intr_restore(cpu_status_bits); } unsigned long @@ -659,12 +516,8 @@ octeon_memory_init(void) uint32_t realmem_bytes; if (octeon_board_real()) { - printf("octeon_dram == %jx\n", (intmax_t)octeon_dram); - printf("reduced to ram: %u MB", (uint32_t)octeon_dram >> 20); - realmem_bytes = (octeon_dram - PAGE_SIZE); realmem_bytes &= ~(PAGE_SIZE - 1); - printf("Real memory bytes is %x\n", realmem_bytes); } else { /* Simulator we limit to 96 meg */ realmem_bytes = (96 << 20); @@ -678,8 +531,6 @@ octeon_memory_init(void) phys_avail[1] = realmem_bytes; realmem_bytes -= OCTEON_DRAM_FIRST_256_END; realmem_bytes &= ~(PAGE_SIZE - 1); - printf("phys_avail[0] = %#lx phys_avail[1] = %#lx\n", - (long)phys_avail[0], (long)phys_avail[1]); } else { /* Simulator gets 96Meg period. */ phys_avail[1] = (96 << 20); @@ -705,23 +556,14 @@ octeon_memory_init(void) realmem_bytes &= ~(PAGE_SIZE - 1); /* Now map the rest of the memory */ phys_avail[2] = 0x20000000; - printf("realmem_bytes is now at %x\n", realmem_bytes); phys_avail[3] = ((uint32_t) 0x20000000 + realmem_bytes); - printf("Next block of memory goes from %#lx to %#lx\n", - (long)phys_avail[2], (long)phys_avail[3]); physmem += btoc(phys_avail[3] - phys_avail[2]); - } else { - printf("realmem_bytes is %d\n", realmem_bytes); } realmem = physmem; printf("Total DRAM Size %#X\n", (uint32_t) octeon_dram); printf("Bank 0 = %#08lX -> %#08lX\n", (long)phys_avail[0], (long)phys_avail[1]); printf("Bank 1 = %#08lX -> %#08lX\n", (long)phys_avail[2], (long)phys_avail[3]); - printf("physmem: %#lx\n", physmem); - - Maxmem = physmem; - } void @@ -760,7 +602,15 @@ platform_start(__register_t a0, __register_t a1, __register_t a2 __unused, kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif platform_counter_freq = octeon_get_clock_rate(); - mips_timer_init_params(platform_counter_freq, 1); + mips_timer_init_params(platform_counter_freq, 0); + +#ifdef SMP + /* + * Clear any pending IPIs and enable the IPI interrupt. + */ + oct_write64(OCTEON_CIU_MBOX_CLRX(0), 0xffffffff); + ciu_enable_interrupts(0, CIU_INT_1, CIU_EN_0, OCTEON_CIU_ENABLE_MBOX_INTR, CIU_MIPS_IP3); +#endif } /* impSTART: This stuff should move back into the Cavium SDK */ @@ -982,7 +832,7 @@ octeon_boot_params_init(register_t ptr) printf("Boot Descriptor Ver: %u -> %u/%u", octeon_bd_ver, octeon_cvmx_bd_ver/100, octeon_cvmx_bd_ver%100); - printf(" CPU clock: %uMHz\n", octeon_cpu_clock/1000000); + printf(" CPU clock: %uMHz Core Mask: %#x\n", octeon_cpu_clock/1000000, octeon_core_mask); printf(" Dram: %u MB", (uint32_t)(octeon_dram >> 20)); printf(" Board Type: %u Revision: %u/%u\n", octeon_board_type, octeon_board_rev_major, octeon_board_rev_minor); diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c new file mode 100644 index 00000000000..8ded87ee282 --- /dev/null +++ b/sys/mips/cavium/octeon_mp.c @@ -0,0 +1,102 @@ +/*- + * Copyright (c) 2004-2010 Juli Mallett + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include + +#include + +unsigned octeon_ap_boot = ~0; + +void +platform_ipi_send(int cpuid) +{ + oct_write64(OCTEON_CIU_MBOX_SETX(cpuid), 1); + mips_wbflush(); +} + +void +platform_ipi_clear(void) +{ + uint64_t action; + + action = oct_read64(OCTEON_CIU_MBOX_CLRX(PCPU_GET(cpuid))); + KASSERT(action == 1, ("unexpected IPIs: %#jx", (uintmax_t)action)); + oct_write64(OCTEON_CIU_MBOX_CLRX(PCPU_GET(cpuid)), action); +} + +int +platform_ipi_intrnum(void) +{ + return (1); +} + +void +platform_init_ap(int cpuid) +{ + /* + * Set the exception base. + */ + mips_wr_ebase(0x80000000 | cpuid); + + /* + * Set up interrupts, clear IPIs and unmask the IPI interrupt. + */ + octeon_ciu_reset(); + + oct_write64(OCTEON_CIU_MBOX_CLRX(cpuid), 0xffffffff); + ciu_enable_interrupts(cpuid, CIU_INT_1, CIU_EN_0, OCTEON_CIU_ENABLE_MBOX_INTR, CIU_MIPS_IP3); + + mips_wbflush(); +} + +int +platform_num_processors(void) +{ + return (fls(octeon_core_mask)); +} + +int +platform_start_ap(int cpuid) +{ + if (atomic_cmpset_32(&octeon_ap_boot, ~0, cpuid) == 0) + return (-1); + for (;;) { + DELAY(1000); + if (atomic_cmpset_32(&octeon_ap_boot, 0, ~0) != 0) + return (0); + printf("Waiting for cpu%d to start\n", cpuid); + } +} diff --git a/sys/mips/cavium/octeon_pcmap_regs.h b/sys/mips/cavium/octeon_pcmap_regs.h index 16dff5b35c9..0ee1a73ea8a 100644 --- a/sys/mips/cavium/octeon_pcmap_regs.h +++ b/sys/mips/cavium/octeon_pcmap_regs.h @@ -54,14 +54,6 @@ #ifndef LOCORE -/* XXXimp: From Cavium's include/pcpu.h, need to port that over */ -#ifndef OCTEON_SMP -#define OCTEON_CORE_ID 0 -#else -extern struct pcpu *cpuid_to_pcpu[]; -#define OCTEON_CORE_ID (mips_rd_coreid()) -#endif - /* * Utility inlines & macros */ @@ -324,62 +316,6 @@ static inline void oct_write32 (uint64_t csr_addr, uint32_t val32) #define OCTEON_SCRATCH_2 32 -static inline uint64_t oct_mf_chord (void) -{ - uint64_t dest; - - __asm __volatile ( ".set push\n" - ".set noreorder\n" - ".set noat\n" - ".set mips64\n" - "dmfc2 $1, 0x400\n" - "move %0, $1\n" - ".set pop\n" - : "=r" (dest) : : "$1"); - return dest; -} - - -#define MIPS64_DMFCz(cop,regnum,cp0reg,select) \ - .word (0x40200000 | (cop << 25) | (regnum << 16) | (cp0reg << 11) | select) - - -#define mips64_getcpz_xstr(s) mips64_getcpz_str(s) -#define mips64_getcpz_str(s) #s - -#define mips64_dgetcpz(cop,cpzreg,sel,val_ptr) \ - ({ __asm __volatile( \ - ".set push\n" \ - ".set mips3\n" \ - ".set noreorder\n" \ - ".set noat\n" \ - mips64_getcpz_xstr(MIPS64_DMFCz(cop,1,cpzreg,sel)) "\n" \ - "nop\n" \ - "nop\n" \ - "nop\n" \ - "nop\n" \ - "sd $1,0(%0)\n" \ - ".set pop" \ - : /* no outputs */ : "r" (val_ptr) : "$1"); \ - }) - - -#define mips64_dgetcp2(cp2reg,sel,retval_ptr) \ - mips64_dgetcpz(2,cp2reg,sel,retval_ptr) - - -#define OCTEON_MF_CHORD(dest) mips64_dgetcp2(0x400, 0, &dest) - - - -#define OCTEON_RDHWR(result, regstr) \ - __asm __volatile ( \ - ".set mips3\n" \ - "rdhwr %0,$" OCTEON_TMP_STR(regstr) "\n" \ - ".set mips\n" \ - : "=d" (result)); - -#define CVMX_MF_CHORD(dest) OCTEON_RDHWR(dest, 30) #define OCTEON_CHORD_HEX(dest_ptr) \ ({ __asm __volatile( \ @@ -397,15 +333,6 @@ static inline uint64_t oct_mf_chord (void) : /* no outputs */ : "r" (dest_ptr) : "$2"); \ }) - - -#define OCTEON_MF_CHORD_BAD(dest) \ - __asm __volatile ( \ - ".set mips3\n" \ - "dmfc2 %0, 0x400\n" \ - ".set mips0\n" \ - : "=&r" (dest) : ) - static inline uint64_t oct_scratch_read64 (uint64_t address) { return(*((volatile uint64_t *)(OCTEON_SCRATCH_BASE + address))); @@ -417,17 +344,6 @@ static inline void oct_scratch_write64 (uint64_t address, uint64_t value) } -#define OCTEON_READ_CSR32(addr, val) \ - addr_ptr = addr; \ - oct_read_32_ptr(&addr_ptr, &val); - -#define OCTEON_WRITE_CSR32(addr, val, val_dummy) \ - addr_ptr = addr; \ - oct_write_32_ptr(&addr_ptr, &val); \ - oct_read64(OCTEON_MIO_BOOT_BIST_STAT); - - - /* * Octeon Address Space Definitions */ @@ -791,12 +707,6 @@ extern void octeon_led_write_hexchar(int char_position, char hexval); extern void octeon_led_write_hex(uint32_t wl); extern void octeon_led_write_string(const char *str); extern void octeon_reset(void); -extern void octeon_uart_write_byte(int uart_index, uint8_t ch); -extern void octeon_uart_write_string(int uart_index, const char *str); -extern void octeon_uart_write_hex(uint32_t wl); -extern void octeon_uart_write_hex2(uint32_t wl, uint32_t wh); -extern void octeon_wait_uart_flush(int uart_index, uint8_t ch); -extern void octeon_uart_write_byte0(uint8_t ch); extern void octeon_led_write_char0(char val); extern void octeon_led_run_wheel(int *pos, int led_position); extern void octeon_debug_symbol(void); diff --git a/sys/mips/cavium/uart_dev_oct16550.c b/sys/mips/cavium/uart_dev_oct16550.c index 3e6d7c7cce7..5c6e11ee59c 100644 --- a/sys/mips/cavium/uart_dev_oct16550.c +++ b/sys/mips/cavium/uart_dev_oct16550.c @@ -644,12 +644,9 @@ oct16550_bus_ipend(struct uart_softc *sc) if (ipend) octeon_led_run_wheel(&where1, 6 + device_get_unit(sc->sc_dev)); #endif - return ((sc->sc_leaving) ? 0 : ipend); + return (ipend); } - - - static int oct16550_bus_param (struct uart_softc *sc, int baudrate, int databits, int stopbits, int parity) diff --git a/sys/mips/conf/AR71XX b/sys/mips/conf/AR71XX index 4ee6aab8a1d..12c73daa5b8 100644 --- a/sys/mips/conf/AR71XX +++ b/sys/mips/conf/AR71XX @@ -29,11 +29,11 @@ options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions # options NFS_LEGACYRPC # Debugging for use in -current -# options DEADLKRES -# options INVARIANTS -# options INVARIANT_SUPPORT -# options WITNESS -# options WITNESS_SKIPSPIN +options DEADLKRES +options INVARIANTS +options INVARIANT_SUPPORT +options WITNESS +options WITNESS_SKIPSPIN options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support options UFS_ACL #Support for access control lists @@ -62,6 +62,7 @@ device ath # Atheros pci/cardbus NIC's options ATH_DEBUG device ath_hal option AH_SUPPORT_AR5416 +option AH_RXCFG_SDMAMW_4BYTES # See NOTES for details of this WAR device ath_rate_sample device mii diff --git a/sys/mips/conf/SENTRY5 b/sys/mips/conf/SENTRY5 index a4e32c07d83..c38e45f20a3 100644 --- a/sys/mips/conf/SENTRY5 +++ b/sys/mips/conf/SENTRY5 @@ -75,6 +75,7 @@ device pci # siba_pcib #device ath_hal # pci chip support #options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors +options USB_DEBUG # enable debug msgs device usb # USB Bus (required) device uhci # UHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) diff --git a/sys/mips/conf/SWARM b/sys/mips/conf/SWARM index 3189bcbcb9c..041c94da983 100644 --- a/sys/mips/conf/SWARM +++ b/sys/mips/conf/SWARM @@ -8,12 +8,6 @@ options CPU_SB1 files "../sibyte/files.sibyte" hints "SWARM.hints" -# -# 32-bit kernel cannot deal with physical memory beyond 4GB -# XXX pmap assumes that all the memory can be mapped using KSEG0 -# -options MAXMEM=512*1024 - options CFE options CFE_CONSOLE options CFE_ENV diff --git a/sys/mips/conf/XLR b/sys/mips/conf/XLR index d743c1b33f0..a0737b3b618 100644 --- a/sys/mips/conf/XLR +++ b/sys/mips/conf/XLR @@ -132,6 +132,7 @@ device scbus #device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device usb # USB Bus (required) +options USB_DEBUG # enable debug msgs #device udbp # USB Double Bulk Pipe devices #device ugen # Generic #device uhid # "Human Interface Devices" diff --git a/sys/mips/include/_inttypes.h b/sys/mips/include/_inttypes.h index 7b14b9b83cb..79664e4f383 100644 --- a/sys/mips/include/_inttypes.h +++ b/sys/mips/include/_inttypes.h @@ -38,177 +38,183 @@ * Macros for format specifiers. */ +#if defined(__mips_n64) +#define PRI64 "l" +#else +#define PRI64 "ll" +#endif + /* fprintf(3) macros for signed integers. */ #define PRId8 "d" /* int8_t */ #define PRId16 "d" /* int16_t */ #define PRId32 "d" /* int32_t */ -#define PRId64 "lld" /* int64_t */ +#define PRId64 PRI64"d" /* int64_t */ #define PRIdLEAST8 "d" /* int_least8_t */ #define PRIdLEAST16 "d" /* int_least16_t */ #define PRIdLEAST32 "d" /* int_least32_t */ -#define PRIdLEAST64 "lld" /* int_least64_t */ +#define PRIdLEAST64 PRI64"d" /* int_least64_t */ #define PRIdFAST8 "d" /* int_fast8_t */ #define PRIdFAST16 "d" /* int_fast16_t */ #define PRIdFAST32 "d" /* int_fast32_t */ -#define PRIdFAST64 "lld" /* int_fast64_t */ +#define PRIdFAST64 PRI64"d" /* int_fast64_t */ #define PRIdMAX "jd" /* intmax_t */ -#define PRIdPTR "d" /* intptr_t */ +#define PRIdPTR "ld" /* intptr_t */ #define PRIi8 "i" /* int8_t */ #define PRIi16 "i" /* int16_t */ #define PRIi32 "i" /* int32_t */ -#define PRIi64 "lli" /* int64_t */ +#define PRIi64 PRI64"i" /* int64_t */ #define PRIiLEAST8 "i" /* int_least8_t */ #define PRIiLEAST16 "i" /* int_least16_t */ #define PRIiLEAST32 "i" /* int_least32_t */ -#define PRIiLEAST64 "lli" /* int_least64_t */ +#define PRIiLEAST64 PRI64"i" /* int_least64_t */ #define PRIiFAST8 "i" /* int_fast8_t */ #define PRIiFAST16 "i" /* int_fast16_t */ #define PRIiFAST32 "i" /* int_fast32_t */ -#define PRIiFAST64 "lli" /* int_fast64_t */ +#define PRIiFAST64 PRI64"i" /* int_fast64_t */ #define PRIiMAX "ji" /* intmax_t */ -#define PRIiPTR "i" /* intptr_t */ +#define PRIiPTR "li" /* intptr_t */ /* fprintf(3) macros for unsigned integers. */ #define PRIo8 "o" /* uint8_t */ #define PRIo16 "o" /* uint16_t */ #define PRIo32 "o" /* uint32_t */ -#define PRIo64 "llo" /* uint64_t */ +#define PRIo64 PRI64"o" /* uint64_t */ #define PRIoLEAST8 "o" /* uint_least8_t */ #define PRIoLEAST16 "o" /* uint_least16_t */ #define PRIoLEAST32 "o" /* uint_least32_t */ -#define PRIoLEAST64 "llo" /* uint_least64_t */ +#define PRIoLEAST64 PRI64"o" /* uint_least64_t */ #define PRIoFAST8 "o" /* uint_fast8_t */ #define PRIoFAST16 "o" /* uint_fast16_t */ #define PRIoFAST32 "o" /* uint_fast32_t */ -#define PRIoFAST64 "llo" /* uint_fast64_t */ +#define PRIoFAST64 PRI64"o" /* uint_fast64_t */ #define PRIoMAX "jo" /* uintmax_t */ -#define PRIoPTR "o" /* uintptr_t */ +#define PRIoPTR "lo" /* uintptr_t */ #define PRIu8 "u" /* uint8_t */ #define PRIu16 "u" /* uint16_t */ #define PRIu32 "u" /* uint32_t */ -#define PRIu64 "llu" /* uint64_t */ +#define PRIu64 PRI64"u" /* uint64_t */ #define PRIuLEAST8 "u" /* uint_least8_t */ #define PRIuLEAST16 "u" /* uint_least16_t */ #define PRIuLEAST32 "u" /* uint_least32_t */ -#define PRIuLEAST64 "llu" /* uint_least64_t */ +#define PRIuLEAST64 PRI64"u" /* uint_least64_t */ #define PRIuFAST8 "u" /* uint_fast8_t */ #define PRIuFAST16 "u" /* uint_fast16_t */ #define PRIuFAST32 "u" /* uint_fast32_t */ -#define PRIuFAST64 "llu" /* uint_fast64_t */ +#define PRIuFAST64 PRI64"u" /* uint_fast64_t */ #define PRIuMAX "ju" /* uintmax_t */ -#define PRIuPTR "u" /* uintptr_t */ +#define PRIuPTR "lu" /* uintptr_t */ #define PRIx8 "x" /* uint8_t */ #define PRIx16 "x" /* uint16_t */ #define PRIx32 "x" /* uint32_t */ -#define PRIx64 "llx" /* uint64_t */ +#define PRIx64 PRI64"x" /* uint64_t */ #define PRIxLEAST8 "x" /* uint_least8_t */ #define PRIxLEAST16 "x" /* uint_least16_t */ #define PRIxLEAST32 "x" /* uint_least32_t */ -#define PRIxLEAST64 "llx" /* uint_least64_t */ +#define PRIxLEAST64 PRI64"x" /* uint_least64_t */ #define PRIxFAST8 "x" /* uint_fast8_t */ #define PRIxFAST16 "x" /* uint_fast16_t */ #define PRIxFAST32 "x" /* uint_fast32_t */ -#define PRIxFAST64 "llx" /* uint_fast64_t */ +#define PRIxFAST64 PRI64"x" /* uint_fast64_t */ #define PRIxMAX "jx" /* uintmax_t */ -#define PRIxPTR "x" /* uintptr_t */ +#define PRIxPTR "lx" /* uintptr_t */ #define PRIX8 "X" /* uint8_t */ #define PRIX16 "X" /* uint16_t */ #define PRIX32 "X" /* uint32_t */ -#define PRIX64 "llX" /* uint64_t */ +#define PRIX64 PRI64"X" /* uint64_t */ #define PRIXLEAST8 "X" /* uint_least8_t */ #define PRIXLEAST16 "X" /* uint_least16_t */ #define PRIXLEAST32 "X" /* uint_least32_t */ -#define PRIXLEAST64 "llX" /* uint_least64_t */ +#define PRIXLEAST64 PRI64"X" /* uint_least64_t */ #define PRIXFAST8 "X" /* uint_fast8_t */ #define PRIXFAST16 "X" /* uint_fast16_t */ #define PRIXFAST32 "X" /* uint_fast32_t */ -#define PRIXFAST64 "llX" /* uint_fast64_t */ +#define PRIXFAST64 PRI64"X" /* uint_fast64_t */ #define PRIXMAX "jX" /* uintmax_t */ -#define PRIXPTR "X" /* uintptr_t */ +#define PRIXPTR "lX" /* uintptr_t */ /* fscanf(3) macros for signed integers. */ #define SCNd8 "hhd" /* int8_t */ #define SCNd16 "hd" /* int16_t */ #define SCNd32 "d" /* int32_t */ -#define SCNd64 "lld" /* int64_t */ +#define SCNd64 PRI64"d" /* int64_t */ #define SCNdLEAST8 "hhd" /* int_least8_t */ #define SCNdLEAST16 "hd" /* int_least16_t */ #define SCNdLEAST32 "d" /* int_least32_t */ -#define SCNdLEAST64 "lld" /* int_least64_t */ +#define SCNdLEAST64 PRI64"d" /* int_least64_t */ #define SCNdFAST8 "d" /* int_fast8_t */ #define SCNdFAST16 "d" /* int_fast16_t */ #define SCNdFAST32 "d" /* int_fast32_t */ -#define SCNdFAST64 "lld" /* int_fast64_t */ +#define SCNdFAST64 PRI64"d" /* int_fast64_t */ #define SCNdMAX "jd" /* intmax_t */ -#define SCNdPTR "d" /* intptr_t */ +#define SCNdPTR "ld" /* intptr_t */ #define SCNi8 "hhi" /* int8_t */ #define SCNi16 "hi" /* int16_t */ #define SCNi32 "i" /* int32_t */ -#define SCNi64 "lli" /* int64_t */ +#define SCNi64 PRI64"i" /* int64_t */ #define SCNiLEAST8 "hhi" /* int_least8_t */ #define SCNiLEAST16 "hi" /* int_least16_t */ #define SCNiLEAST32 "i" /* int_least32_t */ -#define SCNiLEAST64 "lli" /* int_least64_t */ +#define SCNiLEAST64 PRI64"i" /* int_least64_t */ #define SCNiFAST8 "i" /* int_fast8_t */ #define SCNiFAST16 "i" /* int_fast16_t */ #define SCNiFAST32 "i" /* int_fast32_t */ -#define SCNiFAST64 "lli" /* int_fast64_t */ +#define SCNiFAST64 PRI64"i" /* int_fast64_t */ #define SCNiMAX "ji" /* intmax_t */ -#define SCNiPTR "i" /* intptr_t */ +#define SCNiPTR "li" /* intptr_t */ /* fscanf(3) macros for unsigned integers. */ #define SCNo8 "hho" /* uint8_t */ #define SCNo16 "ho" /* uint16_t */ #define SCNo32 "o" /* uint32_t */ -#define SCNo64 "llo" /* uint64_t */ +#define SCNo64 PRI64"o" /* uint64_t */ #define SCNoLEAST8 "hho" /* uint_least8_t */ #define SCNoLEAST16 "ho" /* uint_least16_t */ #define SCNoLEAST32 "o" /* uint_least32_t */ -#define SCNoLEAST64 "llo" /* uint_least64_t */ +#define SCNoLEAST64 PRI64"o" /* uint_least64_t */ #define SCNoFAST8 "o" /* uint_fast8_t */ #define SCNoFAST16 "o" /* uint_fast16_t */ #define SCNoFAST32 "o" /* uint_fast32_t */ -#define SCNoFAST64 "llo" /* uint_fast64_t */ +#define SCNoFAST64 PRI64"o" /* uint_fast64_t */ #define SCNoMAX "jo" /* uintmax_t */ -#define SCNoPTR "o" /* uintptr_t */ +#define SCNoPTR "lo" /* uintptr_t */ #define SCNu8 "hhu" /* uint8_t */ #define SCNu16 "hu" /* uint16_t */ #define SCNu32 "u" /* uint32_t */ -#define SCNu64 "llu" /* uint64_t */ +#define SCNu64 PRI64"u" /* uint64_t */ #define SCNuLEAST8 "hhu" /* uint_least8_t */ #define SCNuLEAST16 "hu" /* uint_least16_t */ #define SCNuLEAST32 "u" /* uint_least32_t */ -#define SCNuLEAST64 "llu" /* uint_least64_t */ +#define SCNuLEAST64 PRI64"u" /* uint_least64_t */ #define SCNuFAST8 "u" /* uint_fast8_t */ #define SCNuFAST16 "u" /* uint_fast16_t */ #define SCNuFAST32 "u" /* uint_fast32_t */ -#define SCNuFAST64 "llu" /* uint_fast64_t */ +#define SCNuFAST64 PRI64"u" /* uint_fast64_t */ #define SCNuMAX "ju" /* uintmax_t */ -#define SCNuPTR "u" /* uintptr_t */ +#define SCNuPTR "lu" /* uintptr_t */ #define SCNx8 "hhx" /* uint8_t */ #define SCNx16 "hx" /* uint16_t */ #define SCNx32 "x" /* uint32_t */ -#define SCNx64 "llx" /* uint64_t */ +#define SCNx64 PRI64"x" /* uint64_t */ #define SCNxLEAST8 "hhx" /* uint_least8_t */ #define SCNxLEAST16 "hx" /* uint_least16_t */ #define SCNxLEAST32 "x" /* uint_least32_t */ -#define SCNxLEAST64 "llx" /* uint_least64_t */ +#define SCNxLEAST64 PRI64"x" /* uint_least64_t */ #define SCNxFAST8 "x" /* uint_fast8_t */ #define SCNxFAST16 "x" /* uint_fast16_t */ #define SCNxFAST32 "x" /* uint_fast32_t */ -#define SCNxFAST64 "llx" /* uint_fast64_t */ +#define SCNxFAST64 PRI64"x" /* uint_fast64_t */ #define SCNxMAX "jx" /* uintmax_t */ -#define SCNxPTR "x" /* uintptr_t */ +#define SCNxPTR "lx" /* uintptr_t */ #endif /* !_MACHINE_INTTYPES_H_ */ diff --git a/sys/mips/include/_limits.h b/sys/mips/include/_limits.h index d544305ce8e..e160a7f4a1d 100644 --- a/sys/mips/include/_limits.h +++ b/sys/mips/include/_limits.h @@ -34,6 +34,10 @@ #ifndef _MACHINE__LIMITS_H_ #define _MACHINE__LIMITS_H_ +#if _MIPS_SZLONG == 64 +#define _LARGE_LONG +#endif + /* * According to ANSI (section 2.2.4.2), the values below must be usable by * #if preprocessing directives. Additionally, the expression must have the @@ -76,9 +80,9 @@ #define __LLONG_MAX 0x7fffffffffffffffLL /* max value for a long long */ #define __LLONG_MIN (-0x7fffffffffffffffLL - 1) /* min for a long long */ -#define __SSIZE_MAX __INT_MAX /* max value for a ssize_t */ +#define __SSIZE_MAX __LONG_MAX /* max value for a ssize_t */ -#define __SIZE_T_MAX __UINT_MAX /* max value for a size_t */ +#define __SIZE_T_MAX __ULONG_MAX /* max value for a size_t */ #define __OFF_MAX __LLONG_MAX /* max value for an off_t */ #define __OFF_MIN __LLONG_MIN /* min value for an off_t */ diff --git a/sys/mips/include/archtype.h b/sys/mips/include/archtype.h deleted file mode 100644 index ed1b5ea6b8f..00000000000 --- a/sys/mips/include/archtype.h +++ /dev/null @@ -1,49 +0,0 @@ -/* $OpenBSD: archtype.h,v 1.6 1999/01/27 04:46:04 imp Exp $ */ -/* - * Copyright (c) 1997 Per Fogelstrom - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed under OpenBSD by - * Per Fogelstrom. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * JNPR: archtype.h,v 1.6 2007/08/09 11:23:32 katta - * - * $FreeBSD$ - */ - -#ifndef _MACHINE_ARCHTYPE_H_ -#define _MACHINE_ARCHTYPE_H_ -/* - * Define architectural identitys for the different Mips machines. - */ - -/* - * FREEBSD_DEVELOPERS_FIXME - * Define constants for the supported MIPS CPU's - */ -#define MIPS_CLASS_UNKNOWN 0x00 - -#endif /* !_MACHINE_ARCHTYPE_H_ */ diff --git a/sys/mips/include/asm.h b/sys/mips/include/asm.h index 0a9c518e929..23bfde148d6 100644 --- a/sys/mips/include/asm.h +++ b/sys/mips/include/asm.h @@ -98,23 +98,6 @@ #define _C_LABEL(x) x -/* - * Endian-independent assembly-code aliases for unaligned memory accesses. - */ -#if BYTE_ORDER == LITTLE_ENDIAN -#define LWLO lwl -#define LWHI lwr -#define SWLO swl -#define SWHI swr -#endif - -#if BYTE_ORDER == BIG_ENDIAN -#define LWLO lwr -#define LWHI lwl -#define SWLO swr -#define SWHI swl -#endif - #ifdef USE_AENT #define AENT(x) \ .aent x, 0 @@ -306,28 +289,32 @@ _C_LABEL(x): /* * Call ast if required + * + * XXX Do we really need to disable interrupts? */ #define DO_AST \ 44: \ - PTR_LA s0, _C_LABEL(disableintr) ;\ - jalr s0 ;\ - nop ;\ - move a0, v0 ;\ + mfc0 t0, MIPS_COP_0_STATUS ;\ + and a0, t0, MIPS_SR_INT_IE ;\ + xor t0, a0, t0 ;\ + mtc0 t0, MIPS_COP_0_STATUS ;\ + COP0_SYNC ;\ GET_CPU_PCPU(s1) ;\ - lw s3, PC_CURPCB(s1) ;\ - lw s1, PC_CURTHREAD(s1) ;\ + PTR_L s3, PC_CURPCB(s1) ;\ + PTR_L s1, PC_CURTHREAD(s1) ;\ lw s2, TD_FLAGS(s1) ;\ li s0, TDF_ASTPENDING | TDF_NEEDRESCHED;\ and s2, s0 ;\ - PTR_LA s0, _C_LABEL(restoreintr) ;\ - jalr s0 ;\ - nop ;\ + mfc0 t0, MIPS_COP_0_STATUS ;\ + or t0, a0, t0 ;\ + mtc0 t0, MIPS_COP_0_STATUS ;\ + COP0_SYNC ;\ beq s2, zero, 4f ;\ nop ;\ PTR_LA s0, _C_LABEL(ast) ;\ jalr s0 ;\ PTR_ADDU a0, s3, U_PCB_REGS ;\ - j 44b ;\ + j 44b ;\ nop ;\ 4: @@ -382,6 +369,45 @@ _C_LABEL(x): #define CALLFRAME_SP (CALLFRAME_SIZ - 2 * SZREG) #define CALLFRAME_RA (CALLFRAME_SIZ - 1 * SZREG) +/* + * Endian-independent assembly-code aliases for unaligned memory accesses. + */ +#if _BYTE_ORDER == _LITTLE_ENDIAN +# define LWHI lwr +# define LWLO lwl +# define SWHI swr +# define SWLO swl +# if SZREG == 4 +# define REG_LHI lwr +# define REG_LLO lwl +# define REG_SHI swr +# define REG_SLO swl +# else +# define REG_LHI ldr +# define REG_LLO ldl +# define REG_SHI sdr +# define REG_SLO sdl +# endif +#endif + +#if _BYTE_ORDER == _BIG_ENDIAN +# define LWHI lwl +# define LWLO lwr +# define SWHI swl +# define SWLO swr +# if SZREG == 4 +# define REG_LHI lwl +# define REG_LLO lwr +# define REG_SHI swl +# define REG_SLO swr +# else +# define REG_LHI ldl +# define REG_LLO ldr +# define REG_SHI sdl +# define REG_SLO sdr +# endif +#endif + /* * While it would be nice to be compatible with the SGI * REG_L and REG_S macros, because they do not take parameters, it @@ -402,6 +428,7 @@ _C_LABEL(x): #define PTR_SUBIU subu #define PTR_L lw #define PTR_LA la +#define PTR_LI li #define PTR_S sw #define PTR_SLL sll #define PTR_SLLV sllv @@ -424,6 +451,7 @@ _C_LABEL(x): #define PTR_SUBIU dsubu #define PTR_L ld #define PTR_LA dla +#define PTR_LI dli #define PTR_S sd #define PTR_SLL dsll #define PTR_SLLV dsllv @@ -765,7 +793,7 @@ _C_LABEL(x): #endif #define GET_CPU_PCPU(reg) \ - lw reg, _C_LABEL(pcpup); + PTR_L reg, _C_LABEL(pcpup); /* * Description of the setjmp buffer diff --git a/sys/mips/include/cpu.h b/sys/mips/include/cpu.h index 1ec1cfc8a86..83b6a85fe31 100644 --- a/sys/mips/include/cpu.h +++ b/sys/mips/include/cpu.h @@ -47,7 +47,6 @@ #ifndef _MACHINE_CPU_H_ #define _MACHINE_CPU_H_ -#include #include #define MIPS_KSEG0_LARGEST_PHYS 0x20000000 @@ -334,6 +333,7 @@ #define cpu_swapout(p) panic("cpu_swapout: can't get here"); #ifndef _LOCORE +#include #include /* * Arguments to hardclock and gatherstats encapsulate the previous @@ -342,7 +342,6 @@ #define clockframe trapframe /* Use normal trap frame */ #define CLKF_USERMODE(framep) ((framep)->sr & SR_KSU_USER) -#define CLKF_BASEPRI(framep) ((framep)->cpl == 0) #define CLKF_PC(framep) ((framep)->pc) #define CLKF_INTR(framep) (0) #define MIPS_CLKF_INTR() (intr_nesting_level >= 1) @@ -350,6 +349,11 @@ #define TRAPF_PC(framep) ((framep)->pc) #define cpu_getstack(td) ((td)->td_frame->sp) +/* + * A machine-independent interface to the CPU's counter. + */ +#define get_cyclecount() mips_rd_count() + /* * CPU identification, from PRID register. */ @@ -454,13 +458,9 @@ extern union cpuprid fpu_id; struct tlb; struct user; -u_int32_t mips_cp0_config1_read(void); int Mips_ConfigCache(void); void Mips_SetWIRED(int); void Mips_SetPID(int); -u_int Mips_GetCOUNT(void); -void Mips_SetCOMPARE(u_int); -u_int Mips_GetCOMPARE(void); void Mips_SyncCache(void); void Mips_SyncDCache(vm_offset_t, int); @@ -542,18 +542,6 @@ extern int intr_nesting_level; * Low level access routines to CPU registers */ -void setsoftintr0(void); -void clearsoftintr0(void); -void setsoftintr1(void); -void clearsoftintr1(void); - - -u_int32_t mips_cp0_status_read(void); -void mips_cp0_status_write(u_int32_t); - -int disableintr(void); -void restoreintr(int); -int enableintr(void); int Mips_TLBGetPID(void); void swi_vm(void *); @@ -562,7 +550,6 @@ void cpu_reset(void); u_int32_t set_intr_mask(u_int32_t); u_int32_t get_intr_mask(void); -u_int32_t get_cyclecount(void); #define cpu_spinwait() /* nothing */ diff --git a/sys/mips/include/cpufunc.h b/sys/mips/include/cpufunc.h index f9100eaeb32..6520671137a 100644 --- a/sys/mips/include/cpufunc.h +++ b/sys/mips/include/cpufunc.h @@ -1,5 +1,29 @@ /* $OpenBSD: pio.h,v 1.2 1998/09/15 10:50:12 pefo Exp $ */ +/*- + * Copyright (c) 2002-2004 Juli Mallett. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ /* * Copyright (c) 1995-1999 Per Fogelstrom. All rights reserved. * @@ -58,15 +82,17 @@ mips_barrier(void) : : : "memory"); } +static __inline void +mips_cp0_sync(void) +{ + __asm __volatile (__XSTRING(COP0_SYNC)); +} + static __inline void mips_wbflush(void) { __asm __volatile ("sync" : : : "memory"); mips_barrier(); -#if 0 - __asm __volatile("mtc0 %0, $12\n" /* MIPS_COP_0_STATUS */ - : : "r" (flag)); -#endif } static __inline void @@ -82,54 +108,7 @@ mips_write_membar(void) } #ifdef _KERNEL - -static __inline void -mips_tlbp(void) -{ - __asm __volatile ("tlbp"); - mips_barrier(); -#if 0 - register_t ret; - register_t tmp; - - __asm __volatile("mfc0 %0, $12\n" /* MIPS_COP_0_STATUS */ - "and %1, %0, $~1\n" /* MIPS_SR_INT_IE */ - "mtc0 %1, $12\n" /* MIPS_COP_0_STATUS */ - : "=r" (ret), "=r" (tmp)); - return (ret); -#endif -} - -static __inline void -mips_tlbr(void) -{ - __asm __volatile ("tlbr"); - mips_barrier(); -} - -static __inline void -mips_tlbwi(void) -{ - __asm __volatile ("tlbwi"); - mips_barrier(); -#if 0 - __asm __volatile("mfc %0, $12\n" /* MIPS_COP_0_STATUS */ - "or %0, %0, $1\n" /* MIPS_SR_INT_IE */ - "mtc0 %0, $12\n" /* MIPS_COP_0_STATUS */ - : "=r" (tmp)); -#endif -} - -static __inline void -mips_tlbwr(void) -{ - __asm __volatile ("tlbwr"); - mips_barrier(); -} - - -#if 0 /* XXX mips64 */ - +#if defined(__mips_n32) || defined(__mips_n64) #define MIPS_RDRW64_COP0(n,r) \ static __inline uint64_t \ mips_rd_ ## n (void) \ @@ -152,10 +131,12 @@ mips_wr_ ## n (uint64_t a0) \ mips_barrier(); \ } struct __hack +#if defined(__mips_n64) MIPS_RDRW64_COP0(entrylo0, MIPS_COP_0_TLB_LO0); MIPS_RDRW64_COP0(entrylo1, MIPS_COP_0_TLB_LO1); MIPS_RDRW64_COP0(entryhi, MIPS_COP_0_TLB_HI); MIPS_RDRW64_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK); +#endif MIPS_RDRW64_COP0(xcontext, MIPS_COP_0_TLB_XCONTEXT); #undef MIPS_RDRW64_COP0 @@ -185,7 +166,7 @@ mips_wr_ ## n (uint32_t a0) \ #define MIPS_RDRW32_COP0_SEL(n,r,s) \ static __inline uint32_t \ -mips_rd_ ## n ## s(void) \ +mips_rd_ ## n(void) \ { \ int v0; \ __asm __volatile ("mfc0 %[v0], $"__XSTRING(r)", "__XSTRING(s)";" \ @@ -194,7 +175,7 @@ mips_rd_ ## n ## s(void) \ return (v0); \ } \ static __inline void \ -mips_wr_ ## n ## s(uint32_t a0) \ +mips_wr_ ## n(uint32_t a0) \ { \ __asm __volatile ("mtc0 %[a0], $"__XSTRING(r)", "__XSTRING(s)";" \ __XSTRING(COP0_SYNC)";" \ @@ -220,9 +201,9 @@ static __inline void mips_sync_icache (void) MIPS_RDRW32_COP0(compare, MIPS_COP_0_COMPARE); MIPS_RDRW32_COP0(config, MIPS_COP_0_CONFIG); -MIPS_RDRW32_COP0_SEL(config, MIPS_COP_0_CONFIG, 1); -MIPS_RDRW32_COP0_SEL(config, MIPS_COP_0_CONFIG, 2); -MIPS_RDRW32_COP0_SEL(config, MIPS_COP_0_CONFIG, 3); +MIPS_RDRW32_COP0_SEL(config1, MIPS_COP_0_CONFIG, 1); +MIPS_RDRW32_COP0_SEL(config2, MIPS_COP_0_CONFIG, 2); +MIPS_RDRW32_COP0_SEL(config3, MIPS_COP_0_CONFIG, 3); MIPS_RDRW32_COP0(count, MIPS_COP_0_COUNT); MIPS_RDRW32_COP0(index, MIPS_COP_0_TLB_INDEX); MIPS_RDRW32_COP0(wired, MIPS_COP_0_TLB_WIRED); @@ -230,26 +211,28 @@ MIPS_RDRW32_COP0(cause, MIPS_COP_0_CAUSE); MIPS_RDRW32_COP0(status, MIPS_COP_0_STATUS); /* XXX: Some of these registers are specific to MIPS32. */ +#if !defined(__mips_n64) MIPS_RDRW32_COP0(entrylo0, MIPS_COP_0_TLB_LO0); MIPS_RDRW32_COP0(entrylo1, MIPS_COP_0_TLB_LO1); -MIPS_RDRW32_COP0(entrylow, MIPS_COP_0_TLB_LOW); MIPS_RDRW32_COP0(entryhi, MIPS_COP_0_TLB_HI); MIPS_RDRW32_COP0(pagemask, MIPS_COP_0_TLB_PG_MASK); +#endif MIPS_RDRW32_COP0(prid, MIPS_COP_0_PRID); +/* XXX 64-bit? */ +MIPS_RDRW32_COP0_SEL(ebase, MIPS_COP_0_PRID, 1); MIPS_RDRW32_COP0(watchlo, MIPS_COP_0_WATCH_LO); -MIPS_RDRW32_COP0_SEL(watchlo, MIPS_COP_0_WATCH_LO, 1); -MIPS_RDRW32_COP0_SEL(watchlo, MIPS_COP_0_WATCH_LO, 2); -MIPS_RDRW32_COP0_SEL(watchlo, MIPS_COP_0_WATCH_LO, 3); +MIPS_RDRW32_COP0_SEL(watchlo1, MIPS_COP_0_WATCH_LO, 1); +MIPS_RDRW32_COP0_SEL(watchlo2, MIPS_COP_0_WATCH_LO, 2); +MIPS_RDRW32_COP0_SEL(watchlo3, MIPS_COP_0_WATCH_LO, 3); MIPS_RDRW32_COP0(watchhi, MIPS_COP_0_WATCH_HI); -MIPS_RDRW32_COP0_SEL(watchhi, MIPS_COP_0_WATCH_HI, 1); -MIPS_RDRW32_COP0_SEL(watchhi, MIPS_COP_0_WATCH_HI, 2); -MIPS_RDRW32_COP0_SEL(watchhi, MIPS_COP_0_WATCH_HI, 3); - -MIPS_RDRW32_COP0_SEL(perfcnt, MIPS_COP_0_PERFCNT, 0); -MIPS_RDRW32_COP0_SEL(perfcnt, MIPS_COP_0_PERFCNT, 1); -MIPS_RDRW32_COP0_SEL(perfcnt, MIPS_COP_0_PERFCNT, 2); -MIPS_RDRW32_COP0_SEL(perfcnt, MIPS_COP_0_PERFCNT, 3); +MIPS_RDRW32_COP0_SEL(watchhi1, MIPS_COP_0_WATCH_HI, 1); +MIPS_RDRW32_COP0_SEL(watchhi2, MIPS_COP_0_WATCH_HI, 2); +MIPS_RDRW32_COP0_SEL(watchhi3, MIPS_COP_0_WATCH_HI, 3); +MIPS_RDRW32_COP0_SEL(perfcnt0, MIPS_COP_0_PERFCNT, 0); +MIPS_RDRW32_COP0_SEL(perfcnt1, MIPS_COP_0_PERFCNT, 1); +MIPS_RDRW32_COP0_SEL(perfcnt2, MIPS_COP_0_PERFCNT, 2); +MIPS_RDRW32_COP0_SEL(perfcnt3, MIPS_COP_0_PERFCNT, 3); #undef MIPS_RDRW32_COP0 @@ -261,7 +244,7 @@ intr_disable(void) s = mips_rd_status(); mips_wr_status(s & ~MIPS_SR_INT_IE); - return (s); + return (s & MIPS_SR_INT_IE); } static __inline register_t @@ -275,7 +258,13 @@ intr_enable(void) return (s); } -#define intr_restore(s) mips_wr_status((s)) +static __inline void +intr_restore(register_t ie) +{ + if (ie == MIPS_SR_INT_IE) { + intr_enable(); + } +} static __inline void breakpoint(void) diff --git a/sys/mips/include/cpuregs.h b/sys/mips/include/cpuregs.h index 74e789c1416..3f4ffd208ba 100644 --- a/sys/mips/include/cpuregs.h +++ b/sys/mips/include/cpuregs.h @@ -78,21 +78,36 @@ * Caching of mapped addresses is controlled by bits in the TLB entry. */ -#define MIPS_KUSEG_START 0x0 -#define MIPS_KSEG0_START 0x80000000 -#define MIPS_KSEG0_END 0x9fffffff -#define MIPS_KSEG1_START 0xa0000000 -#define MIPS_KSEG1_END 0xbfffffff -#define MIPS_KSSEG_START 0xc0000000 -#define MIPS_KSSEG_END 0xdfffffff +#if !defined(_LOCORE) +#define MIPS_KUSEG_START 0x00000000 +#define MIPS_KSEG0_START ((intptr_t)(int32_t)0x80000000) +#define MIPS_KSEG0_END ((intptr_t)(int32_t)0x9fffffff) +#define MIPS_KSEG1_START ((intptr_t)(int32_t)0xa0000000) +#define MIPS_KSEG1_END ((intptr_t)(int32_t)0xbfffffff) +#define MIPS_KSSEG_START ((intptr_t)(int32_t)0xc0000000) +#define MIPS_KSSEG_END ((intptr_t)(int32_t)0xdfffffff) +#define MIPS_KSEG3_START ((intptr_t)(int32_t)0xe0000000) +#define MIPS_KSEG3_END ((intptr_t)(int32_t)0xffffffff) + #define MIPS_KSEG2_START MIPS_KSSEG_START #define MIPS_KSEG2_END MIPS_KSSEG_END -#define MIPS_KSEG3_START 0xe0000000 -#define MIPS_KSEG3_END 0xffffffff +#endif + +#define MIPS_XKPHYS_START 0x8000000000000000 +#define MIPS_XKPHYS_END 0xbfffffffffffffff + +#define MIPS_XKPHYS_CCA_UC 0x02 /* Uncached. */ +#define MIPS_XKPHYS_CCA_CNC 0x03 /* Cacheable non-coherent. */ #define MIPS_PHYS_TO_XKPHYS(cca,x) \ ((0x2ULL << 62) | ((unsigned long long)(cca) << 59) | (x)) -#define MIPS_XKPHYS_TO_PHYS(x) ((x) & 0x0effffffffffffffULL) +#define MIPS_XKPHYS_TO_PHYS(x) ((x) & 0x07ffffffffffffffULL) + +#define MIPS_XUSEG_START 0x0000000000000000 +#define MIPS_XUSEG_END 0x0000010000000000 + +#define MIPS_XKSEG_START 0xc000000000000000 +#define MIPS_XKSEG_END 0xc00000ff80000000 /* CPU dependent mtc0 hazard hook */ #ifdef TARGET_OCTEON @@ -471,7 +486,6 @@ * (3=32bit, 6=64bit, i=impl dep) * 0 MIPS_COP_0_TLB_INDEX 3333 TLB Index. * 1 MIPS_COP_0_TLB_RANDOM 3333 TLB Random. - * 2 MIPS_COP_0_TLB_LOW 3... r3k TLB entry low. * 2 MIPS_COP_0_TLB_LO0 .636 r4k TLB entry low. * 3 MIPS_COP_0_TLB_LO1 .636 r4k TLB entry low, extended. * 4 MIPS_COP_0_TLB_CONTEXT 3636 TLB Context. @@ -531,10 +545,6 @@ #define MIPS_COP_0_EXC_PC _(14) #define MIPS_COP_0_PRID _(15) - -/* MIPS-I */ -#define MIPS_COP_0_TLB_LOW _(2) - /* MIPS-III */ #define MIPS_COP_0_TLB_LO0 _(2) #define MIPS_COP_0_TLB_LO1 _(3) diff --git a/sys/mips/include/db_machdep.h b/sys/mips/include/db_machdep.h index d7bf69a427d..f0a84d2ee9a 100644 --- a/sys/mips/include/db_machdep.h +++ b/sys/mips/include/db_machdep.h @@ -38,7 +38,6 @@ #define _MIPS_DB_MACHDEP_H_ #include -#include #include #include diff --git a/sys/mips/include/defs.h b/sys/mips/include/defs.h deleted file mode 100644 index 20d093e029b..00000000000 --- a/sys/mips/include/defs.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 1996, 2001-2003, 2005, Juniper Networks, Inc. - * All rights reserved. - * - * defs.h -- Simple universal types and definitions for use by the microkernel - * Jim Hayes, November 1996 - * - * JNPR: defs.h,v 1.3.2.1 2007/09/10 08:16:32 girish - * $FreeBSD$ - */ - -#ifndef __DEFS_H__ -#define __DEFS_H__ - -/* - * Paranoid compilation. If defined, the PARANOID flag will enable asserts, - * data structure magic stamping and a suite of other debug tools. To disable - * it, comment out its definition. - */ -#define PARANOID - -/* - * This is the ONLY place you should see hardware specific information - * encoded as #ifdefs. (Well, except for stdarg.h, perhaps.) - * I apologize in advance! - */ -#include -#define CPU_GOT_ONE - -#if !defined(CPU_GOT_ONE) -#error "YOU NEED TO SPECIFY ONE CPU TYPE TO USE THIS FILE" -#endif - -#ifdef TRUE -#undef TRUE -#endif - -#ifdef FALSE -#undef FALSE -#endif - -typedef enum boolean_ -{ - FALSE = 0, - TRUE = 1 -} boolean; - -/* - * Make NULL a pointer within the microkernel environment to catch - * pointer semantic miscreants. - * - * The reason it's conditional here is that some of the BSD includes - * define it multiple times as a straight integer and GCC barfs on - * the alternative prototypes. - */ - -#ifndef NULL -#define NULL (void *)0 -#endif - -/* - * Define some standard sized types. (Defined in cpu-specific type files - * included above.) - */ - -#define MAX_U8 255 -#define MAX_S8 128 -#define MIN_S8 -127 - -#define MAX_U16 0xffff -#define MIN_S16 ((int16_t)(1 << 15)) -#define MAX_S16 ((int16_t)~MIN_S16) - -#define MAX_U32 0xffffffff -#define MIN_S32 ((int32_t)(1 << 31)) -#define MAX_S32 ((int32_t)~MIN_S32) - -#define MAX_U64 ((u_int64_t)0 - 1) -#define MAX_S64 ((int64_t)(MAX_U64 >> 1)) -#define MIN_S64 (-MAX_S64-1) - -/* - * Solaris uses _SIZE_T to mark the fact that "size_t" has already - * been defined. _SYS_TYPES_H_ is used by BSD. - * - */ -#if !defined(_SYS_TYPES_H_) && !defined(_SIZE_T) -typedef UNSIGNED_32 size_t; -#define _SIZE_T -#endif - -#if !defined(_SYS_TYPES_H_) -typedef char * caddr_t; - -typedef UNSIGNED_8 u_int8_t; -typedef SIGNED_8 int8_t; - -typedef UNSIGNED_16 u_int16_t; -typedef SIGNED_16 int16_t; - -typedef UNSIGNED_32 u_int32_t; -typedef SIGNED_32 int32_t; - -typedef UNSIGNED_64 u_int64_t; -typedef SIGNED_64 int64_t; - -typedef UNSIGNED_32 u_long; -typedef UNSIGNED_16 u_short; -typedef UNSIGNED_8 u_char; - - -/* - * Define the standard terminology used in the diag software - * with regards to bytes, words, etc. - * BYTE = 8 bits - * HWORD (halfword) = 2 bytes or 16 bits - * WORD = 4 bytes or 32 bits - * QUAD = 8 bytes or 64 bits - * - * (The term QUAD seems less-than-intuitive here, but it is - * derived from BSD sources where it is defined as int64_t.) - * - * For consistency use the following defines wherever appropriate. - */ - -typedef enum { - NBI_BYTE = (sizeof(u_int8_t) * 8), - NBI_HWORD = (sizeof(u_int16_t) * 8), - NBI_WORD = (sizeof(u_int32_t) * 8), - NBI_QUAD = (sizeof(u_int64_t) * 8) -} num_bits_t; - -typedef enum { - NBY_BYTE = sizeof(u_int8_t), - NBY_HWORD = sizeof(u_int16_t), - NBY_WORD = sizeof(u_int32_t), - NBY_QUAD = sizeof(u_int64_t) -} num_bytes_t; - -/* - * We assume that pid values are 16 bit integers - */ - -typedef u_int16_t pid_t; - -#endif /* _SYS_TYPES_H_ */ - -typedef UNSIGNED_32 magic_t; -typedef int status_t; - -#define BITS_IN_BYTE 8 - -/* - * Packed definition. We use this for fields in network frames where we - * don't want the compiler to pack out to even alignment - */ - -#ifdef PACKED -#undef PACKED -#endif -#define PACKED(x) x __attribute__ ((packed)) - -/* - * __unused is a FreeBSDism that prevents the compiler from choking - * on function parameters that remain unused through the life of a - * function. This is not an issue for the Cygnus toolchain. In general - * it SHOULD NOT BE USED in the martini embedded software repository. - * It should only be used inside of shared code. - */ -#ifndef __unused -#define __unused __attribute__ ((__unused__)) -#endif - -/* - * Basic memory multiples - */ - -#define SIZE_1K 0x00000400 -#define SIZE_2K 0x00000800 -#define SIZE_4K 0x00001000 -#define SIZE_8K 0x00002000 -#define SIZE_16K 0x00004000 -#define SIZE_32K 0x00008000 -#define SIZE_64K 0x00010000 -#define SIZE_128K 0x00020000 -#define SIZE_256K 0x00040000 -#define SIZE_512K 0x00080000 -#define SIZE_1M 0x00100000 -#define SIZE_2M 0x00200000 -#define SIZE_4M 0x00400000 -#define SIZE_8M 0x00800000 -#define SIZE_16M 0x01000000 -#define SIZE_32M 0x02000000 -#define SIZE_64M 0x04000000 -#define SIZE_128M 0x08000000 -#define SIZE_256M 0x10000000 -#define SIZE_512M 0x20000000 -#define SIZE_1G 0x40000000 -#define SIZE_2G 0x80000000 - -/* - * swap16_inline - * swap32_inline - * - * Byteswap a 16 and 32 bit quantities - */ - -static inline u_int16_t -swap16_inline(u_int16_t data) -{ - return(((data & 0x00ff) << 8) | - ((data & 0xff00) >> 8)); -} - -static inline u_int32_t -swap32_inline(u_int32_t data) -{ - return(((data & 0x000000ff) << 24) | - ((data & 0x0000ff00) << 8) | - ((data & 0x00ff0000) >> 8) | - ((data & 0xff000000) >> 24)); -} - -/* - * Define errno_t here as it is needed by the rom and ukernel - */ -typedef u_int32_t errno_t; - -#define EOK 0 - -/* - * Define the main communication structure used for passing - * information from the rom to the ukernel (done here as it is - * used by them both) - */ -typedef struct rom_info_ rom_info_t; - -/* - * Typedef the return code from the ukernel to the ROM - */ -typedef u_int32_t rom_return_t; - -/* - * Pull in the relevant global environment header file - * - * This file is shared by the uKernel and the system simulation effort. - */ -#if defined(ENV_UKERN) || defined (ENV_SYS_SIM) -#include "ukern.h" -#endif /* ENV_UKERN */ - -#if defined(ENV_ROM) -#include "rom.h" -#endif - -#endif /* __DEFS_H__ */ diff --git a/sys/mips/include/param.h b/sys/mips/include/param.h index 9d487022439..06cdeac0483 100644 --- a/sys/mips/include/param.h +++ b/sys/mips/include/param.h @@ -46,9 +46,7 @@ #include #ifdef _KERNEL -#ifdef _LOCORE -#include -#else +#ifndef _LOCORE #include #endif #endif @@ -102,45 +100,28 @@ #define CACHE_LINE_SHIFT 6 #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) -#define NBPG 4096 /* bytes/page */ -#define PGOFSET (NBPG-1) /* byte offset into page */ -#define PGSHIFT 12 /* LOG2(NBPG) */ - #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ #define PAGE_SIZE (1<> PGSHIFT) - #define BLKDEV_IOSIZE 2048 /* xxx: Why is this 1/2 page? */ #define MAXDUMPPGS 1 /* xxx: why is this only one? */ /* * The kernel stack needs to be aligned on a (PAGE_SIZE * 2) boundary. - * - * Although we allocate 3 pages for the kernel stack we end up using - * only the 2 pages that are aligned on a (PAGE_SIZE * 2) boundary. */ -#define KSTACK_PAGES 3 /* kernel stack*/ -#define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */ +#define KSTACK_PAGES 2 /* kernel stack*/ +#define KSTACK_GUARD_PAGES 2 /* pages of kstack guard; 0 disables */ #define UPAGES 2 /* pages ("clicks") (4096 bytes) to disk blocks */ -#define ctod(x) ((x) << (PGSHIFT - DEV_BSHIFT)) -#define dtoc(x) ((x) >> (PGSHIFT - DEV_BSHIFT)) +#define ctod(x) ((x) << (PAGE_SHIFT - DEV_BSHIFT)) +#define dtoc(x) ((x) >> (PAGE_SHIFT - DEV_BSHIFT)) /* * Map a ``block device block'' to a file system block. @@ -151,18 +132,18 @@ #define bdbtofsb(bn) ((bn) / (BLKDEV_IOSIZE/DEV_BSIZE)) /* - * Conversion macros + * Mach derived conversion macros */ -#define mips_round_page(x) ((((unsigned long)(x)) + NBPG - 1) & ~(NBPG-1)) -#define mips_trunc_page(x) ((unsigned long)(x) & ~(NBPG-1)) -#define mips_btop(x) ((unsigned long)(x) >> PGSHIFT) -#define mips_ptob(x) ((unsigned long)(x) << PGSHIFT) -#define round_page mips_round_page -#define trunc_page mips_trunc_page -#define atop(x) ((unsigned long)(x) >> PAGE_SHIFT) -#define ptoa(x) ((unsigned long)(x) << PAGE_SHIFT) +#define round_page(x) (((unsigned long)(x) + PAGE_MASK) & ~PAGE_MASK) +#define trunc_page(x) ((unsigned long)(x) & ~PAGE_MASK) -#define pgtok(x) ((x) * (PAGE_SIZE / 1024)) +#define atop(x) ((unsigned long)(x) >> PAGE_SHIFT) +#define ptoa(x) ((unsigned long)(x) << PAGE_SHIFT) + +#define mips_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) +#define mips_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) + +#define pgtok(x) ((unsigned long)(x) * (PAGE_SIZE / 1024)) #ifndef _KERNEL #define DELAY(n) { register int N = (n); while (--N > 0); } diff --git a/sys/mips/include/pcb.h b/sys/mips/include/pcb.h index f95ef4d8d4a..e0982e35f24 100644 --- a/sys/mips/include/pcb.h +++ b/sys/mips/include/pcb.h @@ -51,7 +51,7 @@ struct pcb { struct trapframe pcb_regs; /* saved CPU and registers */ __register_t pcb_context[14]; /* kernel context for resume */ - int pcb_onfault; /* for copyin/copyout faults */ + void *pcb_onfault; /* for copyin/copyout faults */ register_t pcb_tpc; }; diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index 51ff5512e79..0091d587917 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -88,6 +88,8 @@ struct pmap { pd_entry_t *pm_segtab; /* KVA of segment table */ TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in * pmap */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; int pm_active; /* active on cpus */ struct { u_int32_t asid:ASID_BITS; /* TLB address space tag */ @@ -160,14 +162,8 @@ typedef struct pv_entry { extern vm_offset_t phys_avail[PHYS_AVAIL_ENTRIES + 2]; extern vm_offset_t physmem_desc[PHYS_AVAIL_ENTRIES + 2]; -extern char *ptvmmap; /* poor name! */ extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; -extern pd_entry_t *segbase; - -extern vm_paddr_t mips_wired_tlb_physmem_start; -extern vm_paddr_t mips_wired_tlb_physmem_end; -extern u_int need_wired_tlb_page_pool; #define pmap_page_get_memattr(m) VM_MEMATTR_DEFAULT #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) @@ -179,7 +175,6 @@ void pmap_unmapdev(vm_offset_t, vm_size_t); vm_offset_t pmap_steal_memory(vm_size_t size); void pmap_set_modified(vm_offset_t pa); int page_is_managed(vm_offset_t pa); -void pmap_page_is_free(vm_page_t m); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void pmap_kremove(vm_offset_t va); void *pmap_kenter_temporary(vm_paddr_t pa, int i); @@ -188,37 +183,6 @@ int pmap_compute_pages_to_dump(void); void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); void pmap_flush_pvcache(vm_page_t m); -/* - * floating virtual pages (FPAGES) - * - * These are the reserved virtual memory areas which can be - * mapped to any physical memory. - */ -#define FPAGES 2 -#define FPAGES_SHARED 2 -#define FSPACE ((FPAGES * MAXCPU + FPAGES_SHARED) * PAGE_SIZE) -#define PMAP_FPAGE1 0x00 /* Used by pmap_zero_page & - * pmap_copy_page */ -#define PMAP_FPAGE2 0x01 /* Used by pmap_copy_page */ - -#define PMAP_FPAGE3 0x00 /* Used by pmap_zero_page_idle */ -#define PMAP_FPAGE_KENTER_TEMP 0x01 /* Used by coredump */ - -struct fpage { - vm_offset_t kva; - u_int state; -}; - -struct sysmaps { - struct mtx lock; - struct fpage fp[FPAGES]; -}; - -vm_offset_t -pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, - boolean_t check_unmaped); -void pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp); - /* * Function to save TLB contents so that they may be inspected in the debugger. */ diff --git a/sys/mips/include/proc.h b/sys/mips/include/proc.h index 99dab78e165..e71c858788d 100644 --- a/sys/mips/include/proc.h +++ b/sys/mips/include/proc.h @@ -44,7 +44,7 @@ */ struct mdthread { int md_flags; /* machine-dependent flags */ - int md_upte[KSTACK_PAGES - 1]; /* ptes for mapping u pcb */ + int md_upte[KSTACK_PAGES]; /* ptes for mapping u pcb */ int md_ss_addr; /* single step address for ptrace */ int md_ss_instr; /* single step instruction for ptrace */ register_t md_saved_intr; @@ -53,7 +53,6 @@ struct mdthread { int md_pc_ctrl; /* performance counter control */ int md_pc_count; /* performance counter */ int md_pc_spill; /* performance counter spill */ - vm_offset_t md_realstack; void *md_tls; }; @@ -69,4 +68,10 @@ struct thread; void mips_cpu_switch(struct thread *, struct thread *, struct mtx *); void mips_cpu_throw(struct thread *, struct thread *); +#ifdef __mips_n64 +#define KINFO_PROC_SIZE 1088 +#else +#define KINFO_PROC_SIZE 816 +#endif + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/mips/include/profile.h b/sys/mips/include/profile.h index 728a468dbf4..6d976d90e77 100644 --- a/sys/mips/include/profile.h +++ b/sys/mips/include/profile.h @@ -84,17 +84,17 @@ #ifdef SMP extern int mcount_lock; #define MCOUNT_ENTER(s) { \ - s = disable_intr(); \ + s = intr_disable(); \ while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \ /* nothing */ ; \ } #define MCOUNT_EXIT(s) { \ atomic_store_rel_int(&mcount_lock, 0); \ - enableintr(s); \ + intr_restore(s); \ } #else -#define MCOUNT_ENTER(s) { s = disable_intr(); } -#define MCOUNT_EXIT(s) (enableintr(s)) +#define MCOUNT_ENTER(s) { s = intr_disable(); } +#define MCOUNT_EXIT(s) (intr_restore(s)) #endif /* REVISIT for mips */ diff --git a/sys/mips/include/pte.h b/sys/mips/include/pte.h index db26cbb0929..e3b46ca80d5 100644 --- a/sys/mips/include/pte.h +++ b/sys/mips/include/pte.h @@ -83,7 +83,7 @@ struct tlb { int tlb_lo1; }; -typedef unsigned long pt_entry_t; +typedef unsigned int pt_entry_t; typedef pt_entry_t *pd_entry_t; #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ @@ -126,7 +126,7 @@ typedef pt_entry_t *pd_entry_t; #define pfn_to_vad(x) (((x) & PTE_FRAME) << PTE_SHIFT) /* User virtual to pte offset in page table */ -#define vad_to_pte_offset(adr) (((adr) >> PGSHIFT) & (NPTEPG -1)) +#define vad_to_pte_offset(adr) (((adr) >> PAGE_SHIFT) & (NPTEPG -1)) #define mips_pg_v(entry) ((entry) & PTE_V) #define mips_pg_wired(entry) ((entry) & PTE_WIRED) diff --git a/sys/mips/include/queue.h b/sys/mips/include/queue.h deleted file mode 100644 index d992332c84e..00000000000 --- a/sys/mips/include/queue.h +++ /dev/null @@ -1,171 +0,0 @@ -/*- - * Copyright (c) 1996-1997, 2001, 2005, Juniper Networks, Inc. - * All rights reserved. - * Jim Hayes, November 1996 - * - * queue.h - Description of uKernel queues, for the Juniper Kernel - * - * JNPR: queue.h,v 1.1 2006/08/07 05:38:57 katta - * $FreeBSD$ - * - */ - -#ifndef __QUEUE_H__ -#define __QUEUE_H__ - -/*--------------------------------------------------------------------------- - * QUEUE MANAGEMENT DOCUMENTATION - */ - -/* - -------- - Q_INIT() - -------- - - void q_init(void) - - Initialize the queue management system for the microkernel. - This initializes the debugging flags and sets up accounting. - - --------- - Q_ALLOC() - --------- - - queue_t *q_alloc() - - Allocates a queue from kernel memory, and initializes it for you. - - The default initialization provides a queue that is unbounded. - - If you want to be bounded with special features, use q_control - after initialization. - - q_alloc() returns NULL in the face of peril or low memory. - - -------- - Q_FREE() - -------- - - void *q_free(queue_t *queue_pointer) - - Returns a queue to kernel memory, and frees the queue contents - for you using free() and complains (with a traceback) that you - tried to kill of a non-empty queue. - - If any threads are waiting on the queue, wake them up. - - ----------- - Q_CONTROL() - ----------- - void q_control(queue_t *queue_pointer, queue_size_t max_queue_size); - - For now, allows you to limit queue growth. - - ---------------- - Q_DEQUEUE_WAIT() ** MAY CAUSE THREAD TO BLOCK/CANNOT BE CALLED FROM ISRs ** - ---------------- - - void *q_dequeue_wait(queue_t *queue_pointer, wakeup_mask_t *mask) - - Removes and returns a pointer to the next message in the specified - queue. If the queue is empty, the calling thread goes to sleep - until something is queued to the queue. If this call returns NULL, - then an extraordinary event requires this thread's attention-- - check errno in this case. - - --------- - Q_DEQUEUE ** CAN BE CALLED FROM ISRs ** - --------- - - void *q_dequeue(queue_t *queue_pointer) - - Just like q_dequeue_wait(), but instead of blocking, return NULL. - - ----------- - Q_ENQUEUE() ** CAN BE CALLED FROM ISRs ** - ----------- - - boolean q_enqueue(queue_t *queue_pointer, void *element_pointer) - - Add the element to the end of the named queue. If the add fails - because a limit has been reached, return TRUE. Otherwise return - FALSE if everything went OK. - - ---------- - Q_URGENT() - ---------- - - boolean q_urgent(queue_t *queue_pointer, void *element_pointer) - - Same as q_enqueue(), except this element will be placed at the top - of the queue, and will be picked off at the next q_dequeue_wait() - operation. - - -------- - Q_PEEK() ** CAN BE CALLED FROM ISRs ** - -------- - - void *q_peek(queue_t *queue_pointer) - - Returns a pointer to the top element of the queue without actually - dequeuing it. Returns NULL of the queue is empty. - - This routine will never block. - - ---------- - Q_DELETE() - ---------- - - void q_delete(queue_t *queue_pointer, void *element_pointer) - - Delete the element_pointer from the queue, if it exists. This - isn't speedy, and isn't meant for tasks requiring performance. - It's primary use is to pull something off the queue when you know - in the common case that it's gonna be at or near the top of the - list. (I.e. waking a thread from a wake list when extraordinary - conditions exist, and you have to pluck it from the middle of the - list.) - - This routine does not block or return anything. - - -------- - Q_SIZE() - -------- - - queue_size_t q_size(queue_t *queue_pointer) - - Returns the number of elements in the queue. - - ------------ - Q_MAX_SIZE() - ------------ - - queue_size_t q_max_size(queue_t *queue_pointer); - - Returns the maximum size of this queue, or 0 if this queue is - unbounded. - -*/ - -/*------------------------------------------------------------------------- - * Basic queue management structures. - */ - -/* - * Typedefs - */ - -typedef u_int32_t queue_size_t; - -/* - * Prototypes - */ - -void q_init(void); -queue_t *q_alloc(void); -void *q_peek(queue_t *queue); -void *q_dequeue(queue_t *queue); -boolean q_enqueue(queue_t *queue, void *item); -boolean q_urgent(queue_t *queue, void *item); - -#endif /* __QUEUE_H__ */ diff --git a/sys/mips/include/regnum.h b/sys/mips/include/regnum.h index baa60bd8b86..1c22bb96961 100644 --- a/sys/mips/include/regnum.h +++ b/sys/mips/include/regnum.h @@ -42,10 +42,6 @@ #ifndef _MACHINE_REGNUM_H_ #define _MACHINE_REGNUM_H_ -#define STAND_ARG_SIZE 16 -#define STAND_FRAME_SIZE 24 -#define STAND_RA_OFFSET 20 - /* This must match the numbers * in pcb.h and is used by * swtch.S diff --git a/sys/mips/include/rm7000.h b/sys/mips/include/rm7000.h deleted file mode 100644 index f1c0c44a0db..00000000000 --- a/sys/mips/include/rm7000.h +++ /dev/null @@ -1,95 +0,0 @@ -/* $OpenBSD$ */ - -/* - * Copyright (c) 2000 Opsycon Open System Consulting AB (www.opsycon.se) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Opsycon Open System - * Consulting AB, Sweden under contract to QED, Inc. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * JNPR: rm7000.h,v 1.2.4.1 2007/08/29 12:06:30 girish - * $FreeBSD$ - */ - -#ifndef _MACHINE_RM7000_H_ -#define _MACHINE_RM7000_H_ - -/* - * QED RM7000 specific defines. - */ - -/* - * Performance counters. - */ - -#define PCNT_SRC_CLOCKS 0x00 /* Clock cycles */ -#define PCNT_SRC_INSTR 0x01 /* Total instructions issued */ -#define PCNT_SRC_FPINSTR 0x02 /* Float instructions issued */ -#define PCNT_SRC_IINSTR 0x03 /* Integer instructions issued */ -#define PCNT_SRC_LOAD 0x04 /* Load instructions issued */ -#define PCNT_SRC_STORE 0x05 /* Store instructions issued */ -#define PCNT_SRC_DUAL 0x06 /* Dual issued pairs */ -#define PCNT_SRC_BRPREF 0x07 /* Branch prefetches */ -#define PCNT_SRC_EXTMISS 0x08 /* External cache misses */ -#define PCNT_SRC_STALL 0x09 /* Stall cycles */ -#define PCNT_SRC_SECMISS 0x0a /* Secondary cache misses */ -#define PCNT_SRC_INSMISS 0x0b /* Instruction cache misses */ -#define PCNT_SRC_DTAMISS 0x0c /* Data cache misses */ -#define PCNT_SRC_DTLBMISS 0x0d /* Data TLB misses */ -#define PCNT_SRC_ITLBMISS 0x0e /* Instruction TLB misses */ -#define PCNT_SRC_JTLBIMISS 0x0f /* Joint TLB instruction misses */ -#define PCNT_SRC_JTLBDMISS 0x10 /* Joint TLB data misses */ -#define PCNT_SRC_BRTAKEN 0x11 /* Branches taken */ -#define PCNT_SRC_BRISSUED 0x12 /* Branches issued */ -#define PCNT_SRC_SECWBACK 0x13 /* Secondary cache writebacks */ -#define PCNT_SRC_PRIWBACK 0x14 /* Primary cache writebacks */ -#define PCNT_SRC_DCSTALL 0x15 /* Dcache miss stall cycles */ -#define PCNT_SRC_MISS 0x16 /* Cache misses */ -#define PCNT_SRC_FPEXC 0x17 /* FP possible execption cycles */ -#define PCNT_SRC_MULSLIP 0x18 /* Slip cycles due to mult. busy */ -#define PCNT_SRC_CP0SLIP 0x19 /* CP0 Slip cycles */ -#define PCNT_SRC_LDSLIP 0x1a /* Slip cycles due to pend. non-b ld */ -#define PCNT_SRC_WBFULL 0x1b /* Write buffer full stall cycles */ -#define PCNT_SRC_CISTALL 0x1c /* Cache instruction stall cycles */ -#define PCNT_SRC_MULSTALL 0x1d /* Multiplier stall cycles */ -#define PCNT_SRC_ELDSTALL 0x1d /* Excepion stall due to non-b ld */ -#define PCNT_SRC_MAX 0x1d /* Maximum PCNT select code */ - -/* - * Counter control bits. - */ - -#define PCNT_CE 0x0400 /* Count enable */ -#define PCNT_UM 0x0200 /* Count in User mode */ -#define PCNT_KM 0x0100 /* Count in kernel mode */ - -/* - * Performance counter system call function codes. - */ -#define PCNT_FNC_SELECT 0x0001 /* Select counter source */ -#define PCNT_FNC_READ 0x0002 /* Read current value of counter */ - -#endif /* _MACHINE_RM7000_H_ */ diff --git a/sys/mips/include/sf_buf.h b/sys/mips/include/sf_buf.h index 0a9980cf3d7..b6ee1cc20b6 100644 --- a/sys/mips/include/sf_buf.h +++ b/sys/mips/include/sf_buf.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2003, 2005 Alan L. Cox + * Copyright (c) 2003 Alan L. Cox * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,29 +23,20 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: src/sys/i386/include/sf_buf.h,v 1.4 2005/02/13 06:23:13 alc * $FreeBSD$ */ #ifndef _MACHINE_SF_BUF_H_ -#define _MACHINE_SF_BUF_H_ +#define _MACHINE_SF_BUF_H_ #include -#include -#include -#include struct vm_page; struct sf_buf { - LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ - TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ + SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */ struct vm_page *m; /* currently mapped page */ vm_offset_t kva; /* va of mapping */ - int ref_count; /* usage of this mapping */ -#ifdef SMP - cpumask_t cpumask; /* cpus on which mapping is valid */ -#endif }; static __inline vm_offset_t diff --git a/sys/mips/include/trap.h b/sys/mips/include/trap.h index f382e70f488..d8042382157 100644 --- a/sys/mips/include/trap.h +++ b/sys/mips/include/trap.h @@ -74,17 +74,17 @@ #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) struct trapdebug { /* trap history buffer for debugging */ - u_int status; - u_int cause; - u_int vadr; - u_int pc; - u_int ra; - u_int sp; - u_int code; + register_t status; + register_t cause; + register_t vadr; + register_t pc; + register_t ra; + register_t sp; + register_t code; }; #define trapdebug_enter(x, cd) { \ - intrmask_t s = disableintr(); \ + register_t s = intr_disable(); \ trp->status = x->sr; \ trp->cause = x->cause; \ trp->vadr = x->badvaddr; \ @@ -94,7 +94,7 @@ struct trapdebug { /* trap history buffer for debugging */ trp->code = cd; \ if (++trp == &trapdebug[TRAPSIZE]) \ trp = trapdebug; \ - restoreintr(s); \ + intr_restore(s); \ } #define TRAPSIZE 10 /* Trap log buffer length */ @@ -116,7 +116,7 @@ void MipsTLBMissException(void); void MipsUserGenException(void); void MipsUserIntr(void); -u_int trap(struct trapframe *); +register_t trap(struct trapframe *); #ifndef LOCORE /* XXX */ int check_address(void *); diff --git a/sys/mips/include/vmparam.h b/sys/mips/include/vmparam.h index a5242934019..3b9b12e43d0 100644 --- a/sys/mips/include/vmparam.h +++ b/sys/mips/include/vmparam.h @@ -97,17 +97,19 @@ /* user/kernel map constants */ #define VM_MIN_ADDRESS ((vm_offset_t)0x00000000) +#define VM_MAX_ADDRESS ((vm_offset_t)(intptr_t)(int32_t)0xffffffff) + +#define VM_MINUSER_ADDRESS ((vm_offset_t)0x00000000) #define VM_MAXUSER_ADDRESS ((vm_offset_t)0x80000000) #define VM_MAX_MMAP_ADDR VM_MAXUSER_ADDRESS -#define VM_MAX_ADDRESS ((vm_offset_t)0x80000000) - -#ifndef VM_KERNEL_ALLOC_OFFSET -#define VM_KERNEL_ALLOC_OFFSET ((vm_offset_t)0x00000000) -#endif #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)0xC0000000) -#define VM_KERNEL_WIRED_ADDR_END (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)0xFFFFC000) +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)0xFFFFC000) +#if 0 +#define KERNBASE (VM_MIN_KERNEL_ADDRESS) +#else +#define KERNBASE ((vm_offset_t)(intptr_t)(int32_t)0x80000000) +#endif /* * Disable superpage reservations. (not sure if this is right @@ -150,9 +152,9 @@ #define VM_PHYSSEG_MAX 32 /* - * The physical address space is densely populated. + * The physical address space is sparsely populated. */ -#define VM_PHYSSEG_DENSE +#define VM_PHYSSEG_SPARSE /* * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool @@ -179,23 +181,8 @@ */ #define VM_NFREEORDER 9 -/* - * XXXMIPS: This values need to be changed!!! - */ -#if 0 -#define VM_MIN_ADDRESS ((vm_offset_t)0x0000000000010000) -#define VM_MAXUSER_ADDRESS ((vm_offset_t)MIPS_KSEG0_START-1) -#define VM_MAX_ADDRESS ((vm_offset_t)0x0000000100000000) -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)MIPS_KSEG3_START) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)MIPS_KSEG3_END) -#define KERNBASE (VM_MIN_KERNEL_ADDRESS) - -/* virtual sizes (bytes) for various kernel submaps */ -#define VM_KMEM_SIZE (16*1024*1024) /* XXX ??? */ -#endif - -#define NBSEG 0x400000 /* bytes/segment */ -#define SEGOFSET (NBSEG-1) /* byte offset into segment */ #define SEGSHIFT 22 /* LOG2(NBSEG) */ +#define NBSEG (1 << SEGSHIFT) /* bytes/segment */ +#define SEGOFSET (NBSEG-1) /* byte offset into segment */ #endif /* !_MACHINE_VMPARAM_H_ */ diff --git a/sys/mips/malta/gt_pci.c b/sys/mips/malta/gt_pci.c index 2f0eadaba4e..237e74227e0 100644 --- a/sys/mips/malta/gt_pci.c +++ b/sys/mips/malta/gt_pci.c @@ -109,8 +109,8 @@ struct gt_pci_softc { struct rman sc_mem_rman; struct rman sc_io_rman; struct rman sc_irq_rman; - uint32_t sc_mem; - uint32_t sc_io; + unsigned long sc_mem; + bus_space_handle_t sc_io; struct resource *sc_irq; struct intr_event *sc_eventstab[ICU_LEN]; diff --git a/sys/mips/mips/autoconf.c b/sys/mips/mips/autoconf.c index 99fd541927f..e16c05c147d 100644 --- a/sys/mips/mips/autoconf.c +++ b/sys/mips/mips/autoconf.c @@ -102,6 +102,7 @@ static void configure_final(dummy) void *dummy; { + intr_enable(); cninit_finish(); diff --git a/sys/mips/mips/copystr.S b/sys/mips/mips/copystr.S deleted file mode 100644 index 35e79057a53..00000000000 --- a/sys/mips/mips/copystr.S +++ /dev/null @@ -1,171 +0,0 @@ -/* $NetBSD: copy.S,v 1.5 2007/10/17 19:55:37 garbled Exp $ */ - -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Digital Equipment Corporation and Ralph Campbell. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) 1989 Digital Equipment Corporation. - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby granted, - * provided that the above copyright notice appears in all copies. - * Digital Equipment Corporation makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/loMem.s, - * v 1.1 89/07/11 17:55:04 nelson Exp SPRITE (DECWRL) - * from: Header: /sprite/src/kernel/mach/ds3100.md/RCS/machAsm.s, - * v 9.2 90/01/29 18:00:39 shirriff Exp SPRITE (DECWRL) - * from: Header: /sprite/src/kernel/vm/ds3100.md/vmPmaxAsm.s, - * v 1.1 89/07/10 14:27:41 nelson Exp SPRITE (DECWRL) - * - * @(#)locore.s 8.5 (Berkeley) 1/4/94 - */ - -#include "assym.s" -#include -#include -__FBSDID("$FreeBSD$"); - -#include - -/* - * copystr(9) - * int copystr(const void *src, void *dst, size_t len, - * size_t *done) - */ -ENTRY(copystr) - .set noreorder - .set noat - move v0, zero - beqz a2, 2f - move t1, zero -1: subu a2, 1 /*XXX mips64 unsafe -- long */ - lbu t0, 0(a0) - PTR_ADDU a0, 1 - sb t0, 0(a1) - PTR_ADDU a1, 1 - beqz t0, 3f /* NULL - end of string*/ - addu t1, 1 /*XXX mips64 unsafe -- long */ - bnez a2, 1b - nop -2: /* ENAMETOOLONG */ - li v0, ENAMETOOLONG -3: /* done != NULL -> how many bytes were copied */ - beqz a3, 4f - nop - sw t1, 0(a3) /*XXX mips64 unsafe -- long */ -4: jr ra - nop - .set reorder - .set at -END(copystr) - -/* - * int copyinstr(void *uaddr, void *kaddr, size_t maxlen, size_t *lencopied) - * Copy a NIL-terminated string, at most maxlen characters long, from the - * user's address space. Return the number of characters copied (including - * the NIL) in *lencopied. If the string is too long, return ENAMETOOLONG; - * else return 0 or EFAULT. - */ -LEAF(copyinstr) - .set noreorder - .set noat - lw t2, pcpup - lw v1, PC_CURPCB(t2) - PTR_LA v0, _C_LABEL(copystrerr) - blt a0, zero, _C_LABEL(copystrerr) - sw v0, PCB_ONFAULT(v1) - move t0, a2 - beq a2, zero, 4f -1: - lbu v0, 0(a0) - subu a2, a2, 1 /*xxx mips64 unsafe -- long */ - beq v0, zero, 2f - sb v0, 0(a1) - PTR_ADDU a0, a0, 1 - bne a2, zero, 1b - PTR_ADDU a1, a1, 1 -4: - li v0, ENAMETOOLONG -2: - beq a3, zero, 3f - subu a2, t0, a2 /*xxx mips64 unsafe -- long */ - sw a2, 0(a3) /*xxx mips64 unsafe -- long */ -3: - j ra # v0 is 0 or ENAMETOOLONG - sw zero, PCB_ONFAULT(v1) - .set reorder - .set at -END(copyinstr) - -/* - * int copyoutstr(void *uaddr, void *kaddr, size_t maxlen, size_t *lencopied); - * Copy a NIL-terminated string, at most maxlen characters long, into the - * user's address space. Return the number of characters copied (including - * the NIL) in *lencopied. If the string is too long, return ENAMETOOLONG; - * else return 0 or EFAULT. - */ -LEAF(copyoutstr) - .set noreorder - .set noat - lw t2, pcpup - lw v1, PC_CURPCB(t2) - PTR_LA v0, _C_LABEL(copystrerr) - blt a1, zero, _C_LABEL(copystrerr) - sw v0, PCB_ONFAULT(v1) - move t0, a2 - beq a2, zero, 4f -1: - lbu v0, 0(a0) - subu a2, a2, 1 /*xxx mips64 unsafe -- long */ - beq v0, zero, 2f - sb v0, 0(a1) - PTR_ADDU a0, a0, 1 - bne a2, zero, 1b - PTR_ADDU a1, a1, 1 -4: - li v0, ENAMETOOLONG -2: - beq a3, zero, 3f - subu a2, t0, a2 /*xxx mips64 unsafe -- long */ - sw a2, 0(a3) /*xxx mips64 unsafe -- long */ -3: - j ra # v0 is 0 or ENAMETOOLONG - sw zero, PCB_ONFAULT(v1) - .set reorder - .set at -END(copyoutstr) - -LEAF(copystrerr) - sw zero, PCB_ONFAULT(v1) - j ra - li v0, EFAULT # return EFAULT -END(copystrerr) diff --git a/sys/mips/mips/db_trace.c b/sys/mips/mips/db_trace.c index 9417bcc56d5..116470c3329 100644 --- a/sys/mips/mips/db_trace.c +++ b/sys/mips/mips/db_trace.c @@ -181,7 +181,7 @@ loop: } /* check for bad PC */ /*XXX MIPS64 bad: These hard coded constants are lame */ - if (pc & 3 || pc < (uintptr_t)0x80000000 || pc >= (uintptr_t)edata) { + if (pc & 3 || pc < (uintptr_t)0x80000000) { (*printfn) ("PC 0x%x: not in kernel\n", pc); ra = 0; goto done; diff --git a/sys/mips/mips/exception.S b/sys/mips/mips/exception.S index 161e9475eff..d6fbc9cbd44 100644 --- a/sys/mips/mips/exception.S +++ b/sys/mips/mips/exception.S @@ -66,37 +66,14 @@ #include "assym.s" -#if defined(ISA_MIPS32) -#undef WITH_64BIT_CP0 -#elif defined(ISA_MIPS64) -#define WITH_64BIT_CP0 -#elif defined(ISA_MIPS3) -#define WITH_64BIT_CP0 -#else -#error "Please write the code for this ISA" -#endif +/* + * Clear the software-managed bits in a PTE in register pr. + */ +#define CLEAR_PTE_SWBITS(pr) \ + sll pr, 2 ; \ + srl pr, 2 # keep bottom 30 bits -#ifdef WITH_64BIT_CP0 -#define _SLL dsll -#define _SRL dsrl -#define _MFC0 dmfc0 -#define _MTC0 dmtc0 -#define WIRED_SHIFT 34 -#else -#define _SLL sll -#define _SRL srl -#define _MFC0 mfc0 -#define _MTC0 mtc0 -#define WIRED_SHIFT 2 -#endif .set noreorder # Noreorder is default style! -#if defined(ISA_MIPS32) - .set mips32 -#elif defined(ISA_MIPS64) - .set mips64 -#elif defined(ISA_MIPS3) - .set mips3 -#endif /* * Reasonable limit @@ -125,12 +102,12 @@ * * */ - - .set noat VECTOR(MipsTLBMiss, unknown) - j _C_LABEL(MipsDoTLBMiss) - mfc0 k0, COP_0_BAD_VADDR # get the fault address - nop + .set push + .set noat + j MipsDoTLBMiss + MFC0 k0, COP_0_BAD_VADDR # get the fault address + .set pop VECTOR_END(MipsTLBMiss) /* @@ -145,42 +122,40 @@ VECTOR_END(MipsTLBMiss) * let the processor trap to load the correct value after service. *---------------------------------------------------------------------------- */ + .set push + .set noat MipsDoTLBMiss: - #k0 already has BadVA - bltz k0, 1f #02: k0<0 -> 1f (kernel fault) - srl k0, k0, SEGSHIFT - 2 #03: k0=seg offset (almost) + bltz k0, 1f #02: k0<0 -> 1f (kernel fault) + PTR_SRL k0, k0, SEGSHIFT - 2 #03: k0=seg offset (almost) + GET_CPU_PCPU(k1) - lw k1, PC_SEGBASE(k1) - beqz k1, 2f #05: make sure segbase is not null - andi k0, k0, 0x7fc #06: k0=seg offset (mask 0x3) -#xxx mips64 unsafe? - addu k1, k0, k1 #07: k1=seg entry address - lw k1, 0(k1) #08: k1=seg entry - mfc0 k0, COP_0_BAD_VADDR #09: k0=bad address (again) - beq k1, zero, 2f #0a: ==0 -- no page table - srl k0, PGSHIFT - 2 #0b: k0=VPN (aka va>>10) + PTR_L k1, PC_SEGBASE(k1) + beqz k1, 2f #05: make sure segbase is not null + andi k0, k0, 0xffc #06: k0=seg offset (mask 0x3) + PTR_ADDU k1, k0, k1 #07: k1=seg entry address - andi k0, k0, ((NPTEPG/2) - 1) << 3 #0c: k0=page tab offset -#xxx mips64 unsafe? - addu k1, k1, k0 #0d: k1=pte address - lw k0, 0(k1) #0e: k0=lo0 pte - lw k1, 4(k1) #0f: k1=lo1 pte - _SLL k0, k0, WIRED_SHIFT #10: keep bottom 30 bits - _SRL k0, k0, WIRED_SHIFT #11: keep bottom 30 bits - _MTC0 k0, COP_0_TLB_LO0 #12: lo0 is loaded - _SLL k1, k1, WIRED_SHIFT #13: keep bottom 30 bits - _SRL k1, k1, WIRED_SHIFT #14: keep bottom 30 bits - _MTC0 k1, COP_0_TLB_LO1 #15: lo1 is loaded + PTR_L k1, 0(k1) #08: k1=seg entry + MFC0 k0, COP_0_BAD_VADDR #09: k0=bad address (again) + beq k1, zero, 2f #0a: ==0 -- no page table + srl k0, PAGE_SHIFT - 2 #0b: k0=VPN (aka va>>10) + andi k0, k0, 0xff8 #0c: k0=page tab offset + PTR_ADDU k1, k1, k0 #0d: k1=pte address + lw k0, 0(k1) #0e: k0=lo0 pte + lw k1, 4(k1) #0f: k1=lo0 pte + CLEAR_PTE_SWBITS(k0) + MTC0 k0, COP_0_TLB_LO0 #12: lo0 is loaded + COP0_SYNC + CLEAR_PTE_SWBITS(k1) + MTC0 k1, COP_0_TLB_LO1 #15: lo1 is loaded + COP0_SYNC + tlbwr #1a: write to tlb HAZARD_DELAY - tlbwr #1a: write to tlb - HAZARD_DELAY - eret #1f: retUrn from exception -1: j _C_LABEL(MipsTLBMissException) #20: kernel exception - nop #21: branch delay slot -2: j SlowFault #22: no page table present - nop #23: branch delay slot - - .set at + eret #1f: retUrn from exception +1: j MipsTLBMissException #20: kernel exception + nop #21: branch delay slot +2: j SlowFault #22: no page table present + nop #23: branch delay slot + .set pop /* * This code is copied to the general exception vector address to @@ -207,7 +182,7 @@ VECTOR(MipsException, unknown) # the cause is already # shifted left by 2 bits so # we dont have to shift. - lw k0, 0(k0) # Get the function address + PTR_L k0, 0(k0) # Get the function address nop j k0 # Jump to the function. nop @@ -244,20 +219,9 @@ SlowFault: * *---------------------------------------------------------------------------- */ -#if defined(ISA_MIPS32) -#define STORE sw /* 32 bit mode regsave instruction */ -#define LOAD lw /* 32 bit mode regload instruction */ -#define RSIZE 4 /* 32 bit mode register size */ -#elif defined(ISA_MIPS64) -#define STORE sd /* 64 bit mode regsave instruction */ -#define LOAD ld /* 64 bit mode regload instruction */ -#define RSIZE 8 /* 64 bit mode register size */ -#else -#error "Please write code for this isa." -#endif #define SAVE_REG(reg, offs, base) \ - STORE reg, STAND_ARG_SIZE + (RSIZE * offs) (base) + REG_S reg, CALLFRAME_SIZ + (SZREG * offs) (base) #ifdef TARGET_OCTEON #define CLEAR_STATUS \ @@ -266,15 +230,17 @@ SlowFault: or a0, a0, a2 ; \ li a2, ~(MIPS_SR_INT_IE | MIPS_SR_EXL | SR_KSU_USER) ; \ and a0, a0, a2 ; \ - mtc0 a0, COP_0_STATUS_REG + mtc0 a0, COP_0_STATUS_REG ; \ + ITLBNOPFIX #else #define CLEAR_STATUS \ mfc0 a0, COP_0_STATUS_REG ;\ li a2, ~(MIPS_SR_INT_IE | MIPS_SR_EXL | SR_KSU_USER) ; \ and a0, a0, a2 ; \ - mtc0 a0, COP_0_STATUS_REG + mtc0 a0, COP_0_STATUS_REG ; \ + ITLBNOPFIX #endif - + /* * Save CPU and CP0 register state. * @@ -317,8 +283,8 @@ SlowFault: mfhi v1 ;\ mfc0 a0, COP_0_STATUS_REG ;\ mfc0 a1, COP_0_CAUSE_REG ;\ - mfc0 a2, COP_0_BAD_VADDR ;\ - mfc0 a3, COP_0_EXC_PC ;\ + MFC0 a2, COP_0_BAD_VADDR ;\ + MFC0 a3, COP_0_EXC_PC ;\ SAVE_REG(v0, MULLO, sp) ;\ SAVE_REG(v1, MULHI, sp) ;\ SAVE_REG(a0, SR, sp) ;\ @@ -332,20 +298,20 @@ SlowFault: PTR_ADDU v0, sp, KERN_EXC_FRAME_SIZE ;\ SAVE_REG(v0, SP, sp) ;\ CLEAR_STATUS ;\ - PTR_ADDU a0, sp, STAND_ARG_SIZE ;\ + PTR_ADDU a0, sp, CALLFRAME_SIZ ;\ ITLBNOPFIX #define RESTORE_REG(reg, offs, base) \ - LOAD reg, STAND_ARG_SIZE + (RSIZE * offs) (base) + REG_L reg, CALLFRAME_SIZ + (SZREG * offs) (base) #define RESTORE_CPU \ - mtc0 zero,COP_0_STATUS_REG ;\ + CLEAR_STATUS ;\ RESTORE_REG(k0, SR, sp) ;\ RESTORE_REG(t0, MULLO, sp) ;\ RESTORE_REG(t1, MULHI, sp) ;\ mtlo t0 ;\ mthi t1 ;\ - _MTC0 v0, COP_0_EXC_PC ;\ + MTC0 v0, COP_0_EXC_PC ;\ .set noat ;\ RESTORE_REG(AT, AST, sp) ;\ RESTORE_REG(v0, V0, sp) ;\ @@ -384,13 +350,13 @@ SlowFault: * the status register and the multiply lo and high registers. * In addition, we set this up for linkage conventions. */ -#define KERN_REG_SIZE (NUMSAVEREGS * RSIZE) -#define KERN_EXC_FRAME_SIZE (STAND_FRAME_SIZE + KERN_REG_SIZE + 16) +#define KERN_REG_SIZE (NUMSAVEREGS * SZREG) +#define KERN_EXC_FRAME_SIZE (CALLFRAME_SIZ + KERN_REG_SIZE + 16) NNON_LEAF(MipsKernGenException, KERN_EXC_FRAME_SIZE, ra) .set noat - subu sp, sp, KERN_EXC_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - KERN_EXC_FRAME_SIZE) + PTR_SUBU sp, sp, KERN_EXC_FRAME_SIZE + .mask 0x80000000, (CALLFRAME_RA - KERN_EXC_FRAME_SIZE) /* * Save CPU state, building 'frame'. */ @@ -401,7 +367,7 @@ NNON_LEAF(MipsKernGenException, KERN_EXC_FRAME_SIZE, ra) PTR_LA gp, _C_LABEL(_gp) PTR_LA k0, _C_LABEL(trap) jalr k0 - sw a3, STAND_RA_OFFSET + KERN_REG_SIZE(sp) # for debugging + REG_S a3, CALLFRAME_RA + KERN_REG_SIZE(sp) # for debugging /* * Update interrupt mask in saved status register @@ -410,7 +376,6 @@ NNON_LEAF(MipsKernGenException, KERN_EXC_FRAME_SIZE, ra) * in trap handler */ mfc0 a0, COP_0_STATUS_REG - mtc0 zero, COP_0_STATUS_REG and a0, a0, SR_INT_MASK RESTORE_REG(a1, SR, sp) and a1, a1, ~SR_INT_MASK @@ -424,10 +389,10 @@ END(MipsKernGenException) #define SAVE_U_PCB_REG(reg, offs, base) \ - STORE reg, U_PCB_REGS + (RSIZE * offs) (base) + REG_S reg, U_PCB_REGS + (SZREG * offs) (base) #define RESTORE_U_PCB_REG(reg, offs, base) \ - LOAD reg, U_PCB_REGS + (RSIZE * offs) (base) + REG_L reg, U_PCB_REGS + (SZREG * offs) (base) /*---------------------------------------------------------------------------- * @@ -443,14 +408,14 @@ END(MipsKernGenException) * *---------------------------------------------------------------------------- */ -NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) +NNON_LEAF(MipsUserGenException, CALLFRAME_SIZ, ra) .set noat - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) /* * Save all of the registers except for the kernel temporaries in u.u_pcb. */ GET_CPU_PCPU(k1) - lw k1, PC_CURPCB(k1) + PTR_L k1, PC_CURPCB(k1) SAVE_U_PCB_REG(AT, AST, k1) .set at SAVE_U_PCB_REG(v0, V0, k1) @@ -476,17 +441,17 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) SAVE_U_PCB_REG(s2, S2, k1) SAVE_U_PCB_REG(s3, S3, k1) SAVE_U_PCB_REG(s4, S4, k1) - mfc0 a2, COP_0_BAD_VADDR # Third arg is the fault addr + MFC0 a2, COP_0_BAD_VADDR # Third arg is the fault addr SAVE_U_PCB_REG(s5, S5, k1) SAVE_U_PCB_REG(s6, S6, k1) SAVE_U_PCB_REG(s7, S7, k1) SAVE_U_PCB_REG(t8, T8, k1) - mfc0 a3, COP_0_EXC_PC # Fourth arg is the pc. + MFC0 a3, COP_0_EXC_PC # Fourth arg is the pc. SAVE_U_PCB_REG(t9, T9, k1) SAVE_U_PCB_REG(gp, GP, k1) SAVE_U_PCB_REG(sp, SP, k1) SAVE_U_PCB_REG(s8, S8, k1) - subu sp, k1, STAND_FRAME_SIZE # switch to kernel SP + PTR_SUBU sp, k1, CALLFRAME_SIZ # switch to kernel SP SAVE_U_PCB_REG(ra, RA, k1) SAVE_U_PCB_REG(v0, MULLO, k1) SAVE_U_PCB_REG(v1, MULHI, k1) @@ -494,12 +459,12 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) SAVE_U_PCB_REG(a1, CAUSE, k1) SAVE_U_PCB_REG(a2, BADVADDR, k1) SAVE_U_PCB_REG(a3, PC, k1) - sw a3, STAND_RA_OFFSET(sp) # for debugging + REG_S a3, CALLFRAME_RA(sp) # for debugging PTR_LA gp, _C_LABEL(_gp) # switch to kernel GP # Turn off fpu and enter kernel mode and t0, a0, ~(SR_COP_1_BIT | SR_EXL | SR_KSU_MASK | SR_INT_ENAB) #ifdef TARGET_OCTEON - or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX) + or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX | MIPS32_SR_PX) #endif mtc0 t0, COP_0_STATUS_REG PTR_ADDU a0, k1, U_PCB_REGS @@ -518,10 +483,7 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) */ DO_AST - mfc0 t0, COP_0_STATUS_REG # disable int - and t0, t0, ~(MIPS_SR_INT_IE) - mtc0 t0, COP_0_STATUS_REG - ITLBNOPFIX + CLEAR_STATUS /* * The use of k1 for storing the PCB pointer must be done only @@ -529,7 +491,7 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) * by the interrupt code. */ GET_CPU_PCPU(k1) - lw k1, PC_CURPCB(k1) + PTR_L k1, PC_CURPCB(k1) /* * Update interrupt mask in saved status register @@ -549,7 +511,7 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) mthi t1 RESTORE_U_PCB_REG(a0, PC, k1) RESTORE_U_PCB_REG(v0, V0, k1) - _MTC0 a0, COP_0_EXC_PC # set return address + MTC0 a0, COP_0_EXC_PC # set return address RESTORE_U_PCB_REG(v1, V1, k1) RESTORE_U_PCB_REG(a0, A0, k1) RESTORE_U_PCB_REG(a1, A1, k1) @@ -578,9 +540,6 @@ NNON_LEAF(MipsUserGenException, STAND_FRAME_SIZE, ra) RESTORE_U_PCB_REG(k0, SR, k1) RESTORE_U_PCB_REG(s8, S8, k1) RESTORE_U_PCB_REG(ra, RA, k1) -#ifdef TARGET_OCTEON - and k0, k0, ~(MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX) -#endif .set noat RESTORE_U_PCB_REG(AT, AST, k1) @@ -610,27 +569,25 @@ END(MipsUserGenException) NNON_LEAF(MipsKernIntr, KERN_EXC_FRAME_SIZE, ra) .set noat - subu sp, sp, KERN_EXC_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - KERN_EXC_FRAME_SIZE) + PTR_SUBU sp, sp, KERN_EXC_FRAME_SIZE + .mask 0x80000000, (CALLFRAME_RA - KERN_EXC_FRAME_SIZE) /* - * Save the relevant kernel registers onto the stack. + * Save CPU state, building 'frame'. */ SAVE_CPU - /* - * Call the interrupt handler. + * Call the interrupt handler. a0 points at the saved frame. */ PTR_LA gp, _C_LABEL(_gp) - PTR_ADDU a0, sp, STAND_ARG_SIZE PTR_LA k0, _C_LABEL(cpu_intr) jalr k0 - sw a3, STAND_RA_OFFSET + KERN_REG_SIZE(sp) - /* Why no AST processing here? */ + REG_S a3, CALLFRAME_RA + KERN_REG_SIZE(sp) # for debugging /* * Update interrupt mask in saved status register * Some of interrupts could be disabled by - * intr filters + * intr filters if interrupts are enabled later + * in trap handler */ mfc0 a0, COP_0_STATUS_REG and a0, a0, SR_INT_MASK @@ -638,12 +595,8 @@ NNON_LEAF(MipsKernIntr, KERN_EXC_FRAME_SIZE, ra) and a1, a1, ~SR_INT_MASK or a1, a1, a0 SAVE_REG(a1, SR, sp) - -/* - * Restore registers and return from the interrupt. - */ - lw v0, STAND_RA_OFFSET + KERN_REG_SIZE(sp) - RESTORE_CPU + REG_L v0, CALLFRAME_RA + KERN_REG_SIZE(sp) + RESTORE_CPU # v0 contains the return address. sync eret .set at @@ -668,15 +621,15 @@ END(MipsKernIntr) * *---------------------------------------------------------------------------- */ -NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) +NNON_LEAF(MipsUserIntr, CALLFRAME_SIZ, ra) .set noat - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) /* * Save the relevant user registers into the u.u_pcb struct. * We don't need to save s0 - s8 because the compiler does it for us. */ GET_CPU_PCPU(k1) - lw k1, PC_CURPCB(k1) + PTR_L k1, PC_CURPCB(k1) SAVE_U_PCB_REG(AT, AST, k1) .set at SAVE_U_PCB_REG(v0, V0, k1) @@ -715,19 +668,19 @@ NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) mfhi v1 mfc0 a0, COP_0_STATUS_REG mfc0 a1, COP_0_CAUSE_REG - mfc0 a3, COP_0_EXC_PC + MFC0 a3, COP_0_EXC_PC SAVE_U_PCB_REG(v0, MULLO, k1) SAVE_U_PCB_REG(v1, MULHI, k1) SAVE_U_PCB_REG(a0, SR, k1) SAVE_U_PCB_REG(a1, CAUSE, k1) SAVE_U_PCB_REG(a3, PC, k1) # PC in a3, note used later! - subu sp, k1, STAND_FRAME_SIZE # switch to kernel SP + PTR_SUBU sp, k1, CALLFRAME_SIZ # switch to kernel SP PTR_LA gp, _C_LABEL(_gp) # switch to kernel GP # Turn off fpu, disable interrupts, set kernel mode kernel mode, clear exception level. and t0, a0, ~(SR_COP_1_BIT | SR_EXL | SR_INT_ENAB | SR_KSU_MASK) #ifdef TARGET_OCTEON - or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX) + or t0, t0, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX | MIPS32_SR_PX) #endif mtc0 t0, COP_0_STATUS_REG ITLBNOPFIX @@ -737,7 +690,7 @@ NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) */ PTR_LA k0, _C_LABEL(cpu_intr) jalr k0 - sw a3, STAND_RA_OFFSET(sp) # for debugging + REG_S a3, CALLFRAME_RA(sp) # for debugging /* * Enable interrupts before doing ast(). @@ -759,13 +712,10 @@ NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) /* * Restore user registers and return. */ - mfc0 t0, COP_0_STATUS_REG # disable int - and t0, t0, ~(MIPS_SR_INT_IE) - mtc0 t0, COP_0_STATUS_REG - ITLBNOPFIX + CLEAR_STATUS GET_CPU_PCPU(k1) - lw k1, PC_CURPCB(k1) + PTR_L k1, PC_CURPCB(k1) /* * Update interrupt mask in saved status register @@ -793,7 +743,7 @@ NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) RESTORE_U_PCB_REG(t2, PC, k1) mtlo t0 mthi t1 - _MTC0 t2, COP_0_EXC_PC # set return address + MTC0 t2, COP_0_EXC_PC # set return address RESTORE_U_PCB_REG(v0, V0, k1) RESTORE_U_PCB_REG(v1, V1, k1) RESTORE_U_PCB_REG(a0, A0, k1) @@ -814,9 +764,6 @@ NNON_LEAF(MipsUserIntr, STAND_FRAME_SIZE, ra) RESTORE_U_PCB_REG(k0, SR, k1) RESTORE_U_PCB_REG(sp, SP, k1) RESTORE_U_PCB_REG(ra, RA, k1) -#ifdef TARGET_OCTEON - and k0, k0, ~(MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX) -#endif .set noat RESTORE_U_PCB_REG(AT, AST, k1) @@ -832,78 +779,78 @@ NLEAF(MipsTLBInvalidException) .set noat .set noreorder - mfc0 k0, COP_0_BAD_VADDR - li k1, VM_MAXUSER_ADDRESS - sltu k1, k0, k1 - bnez k1, 1f + MFC0 k0, COP_0_BAD_VADDR + PTR_LI k1, VM_MAXUSER_ADDRESS + sltu k1, k0, k1 + bnez k1, 1f nop - /* badvaddr = kernel address */ - lui k1, %hi(_C_LABEL(kernel_segmap)) - b 2f - lw k1, %lo(_C_LABEL(kernel_segmap))(k1) + /* Kernel address. */ + lui k1, %hi(kernel_segmap) # k1=hi of segbase + b 2f + PTR_L k1, %lo(kernel_segmap)(k1) # k1=segment tab base -1: - /* badvaddr = user address */ +1: /* User address. */ GET_CPU_PCPU(k1) - lw k1, PC_SEGBASE(k1) + PTR_L k1, PC_SEGBASE(k1) -2: - beqz k1, 3f /* invalid page directory pointer */ +2: /* Validate page directory pointer. */ + beqz k1, 3f nop - srl k0, SEGSHIFT - 2 - andi k0, 0xffc - addu k1, k1, k0 - lw k1, 0(k1) - beqz k1, 3f /* invalid page table page pointer */ + PTR_SRL k0, SEGSHIFT - 2 # k0=seg offset (almost) + beq k1, zero, MipsKernGenException # ==0 -- no seg tab + andi k0, k0, 0xffc # k0=seg offset (mask 0x3) + PTR_ADDU k1, k0, k1 # k1=seg entry address + PTR_L k1, 0(k1) # k1=seg entry + + /* Validate page table pointer. */ + beqz k1, 3f nop - mfc0 k0, COP_0_BAD_VADDR - srl k0, PGSHIFT - 2 - andi k0, 0xffc - addu k1, k1, k0 + MFC0 k0, COP_0_BAD_VADDR # k0=bad address (again) + PTR_SRL k0, PAGE_SHIFT - 2 # k0=VPN + andi k0, k0, 0xffc # k0=page tab offset + PTR_ADDU k1, k1, k0 # k1=pte address + lw k0, 0(k1) # k0=this PTE - lw k0, 0(k1) - andi k0, PTE_V - beqz k0, 3f /* invalid page table entry */ + /* Validate page table entry. */ + andi k0, PTE_V + beqz k0, 3f nop - andi k0, k1, 4 - bnez k0, odd_page + /* Check whether this is an even or odd entry. */ + andi k0, k1, 4 + bnez k0, odd_page nop -even_page: - lw k0, 0(k1) - _SLL k0, k0, WIRED_SHIFT - _SRL k0, k0, WIRED_SHIFT - _MTC0 k0, COP_0_TLB_LO0 + lw k0, 0(k1) + lw k1, 4(k1) + CLEAR_PTE_SWBITS(k0) + MTC0 k0, COP_0_TLB_LO0 + COP0_SYNC + CLEAR_PTE_SWBITS(k1) + MTC0 k1, COP_0_TLB_LO1 + COP0_SYNC - lw k0, 4(k1) - _SLL k0, k0, WIRED_SHIFT - _SRL k0, k0, WIRED_SHIFT - _MTC0 k0, COP_0_TLB_LO1 - - b tlb_insert_entry + b tlb_insert_entry nop odd_page: - lw k0, 0(k1) - _SLL k0, k0, WIRED_SHIFT - _SRL k0, k0, WIRED_SHIFT - _MTC0 k0, COP_0_TLB_LO1 - - lw k0, -4(k1) - _SLL k0, k0, WIRED_SHIFT - _SRL k0, k0, WIRED_SHIFT - _MTC0 k0, COP_0_TLB_LO0 + lw k0, -4(k1) + lw k1, 0(k1) + CLEAR_PTE_SWBITS(k0) + MTC0 k0, COP_0_TLB_LO0 + COP0_SYNC + CLEAR_PTE_SWBITS(k1) + MTC0 k1, COP_0_TLB_LO1 + COP0_SYNC tlb_insert_entry: tlbp HAZARD_DELAY - mfc0 k0, COP_0_TLB_INDEX - HAZARD_DELAY - bltz k0, tlb_insert_random + mfc0 k0, COP_0_TLB_INDEX + bltz k0, tlb_insert_random nop tlbwi eret @@ -927,8 +874,8 @@ tlb_insert_random: * Check for kernel stack overflow. */ GET_CPU_PCPU(k1) - lw k0, PC_CURTHREAD(k1) - lw k0, TD_REALKSTACK(k0) + PTR_L k0, PC_CURTHREAD(k1) + PTR_L k0, TD_KSTACK(k0) sltu k0, k0, sp bnez k0, _C_LABEL(MipsKernGenException) nop @@ -944,8 +891,8 @@ tlb_insert_random: sll k1, k1, PAGE_SHIFT + 1 PTR_LA k0, _C_LABEL(pcpu_space) - addiu k0, (NBPG * 2) - addu k0, k0, k1 + PTR_ADDU k0, PAGE_SIZE * 2 + PTR_ADDU k0, k0, k1 /* * Stash the original value of 'sp' so we can update trapframe later. @@ -954,12 +901,12 @@ tlb_insert_random: move k1, sp move sp, k0 - subu sp, sp, KERN_EXC_FRAME_SIZE + PTR_SUBU sp, sp, KERN_EXC_FRAME_SIZE move k0, ra move ra, zero - sw ra, CALLFRAME_RA(sp) /* stop the ddb backtrace right here */ - sw zero, CALLFRAME_SP(sp) + REG_S ra, CALLFRAME_RA(sp) /* stop the ddb backtrace right here */ + REG_S zero, CALLFRAME_SP(sp) move ra, k0 SAVE_CPU @@ -974,8 +921,8 @@ tlb_insert_random: * Squelch any more overflow checks by setting the stack base to 0. */ GET_CPU_PCPU(k1) - lw k0, PC_CURTHREAD(k1) - sw zero, TD_REALKSTACK(k0) + PTR_L k0, PC_CURTHREAD(k1) + PTR_S zero, TD_KSTACK(k0) move a1, a0 PANIC("kernel stack overflow - trapframe at %p") @@ -1008,34 +955,30 @@ END(MipsTLBInvalidException) */ NLEAF(MipsTLBMissException) .set noat - mfc0 k0, COP_0_BAD_VADDR # k0=bad address - li k1, (VM_MAX_KERNEL_ADDRESS) # check fault address against - sltu k1, k1, k0 # upper bound of kernel_segmap - bnez k1, _C_LABEL(MipsKernGenException) # out of bound - lui k1, %hi(_C_LABEL(kernel_segmap)) # k1=hi of segbase - srl k0, 20 # k0=seg offset (almost) - lw k1, %lo(_C_LABEL(kernel_segmap))(k1) # k1=segment tab base - beq k1, zero, _C_LABEL(MipsKernGenException) # ==0 -- no seg tab - andi k0, k0, 0xffc # k0=seg offset (mask 0x3) -#xxx mips64 unsafe - addu k1, k0, k1 # k1=seg entry address - lw k1, 0(k1) # k1=seg entry - mfc0 k0, COP_0_BAD_VADDR # k0=bad address (again) - beq k1, zero, _C_LABEL(MipsKernGenException) # ==0 -- no page table - srl k0, 10 # k0=VPN (aka va>>10) - andi k0, k0, 0xff8 # k0=page tab offset -#xxx mips64 unsafe - addu k1, k1, k0 # k1=pte address - lw k0, 0(k1) # k0=lo0 pte - lw k1, 4(k1) # k1=lo1 pte - _SLL k0, WIRED_SHIFT # chop bits [31..30] - _SRL k0, WIRED_SHIFT # chop bits [31..30] - _MTC0 k0, COP_0_TLB_LO0 # lo0 is loaded - _SLL k1, WIRED_SHIFT # chop bits [31..30] - _SRL k1, WIRED_SHIFT # chop bits [31..30] - _MTC0 k1, COP_0_TLB_LO1 # lo1 is loaded - - HAZARD_DELAY + MFC0 k0, COP_0_BAD_VADDR # k0=bad address + PTR_LI k1, VM_MAX_KERNEL_ADDRESS # check fault address against + sltu k1, k1, k0 # upper bound of kernel_segmap + bnez k1, MipsKernGenException # out of bound + lui k1, %hi(kernel_segmap) # k1=hi of segbase + PTR_SRL k0, SEGSHIFT - 2 # k0=seg offset (almost) + PTR_L k1, %lo(kernel_segmap)(k1) # k1=segment tab base + beq k1, zero, MipsKernGenException # ==0 -- no seg tab + andi k0, k0, 0xffc # k0=seg offset (mask 0x3) + PTR_ADDU k1, k0, k1 # k1=seg entry address + PTR_L k1, 0(k1) # k1=seg entry + MFC0 k0, COP_0_BAD_VADDR # k0=bad address (again) + beq k1, zero, MipsKernGenException # ==0 -- no page table + PTR_SRL k0, PAGE_SHIFT - 2 # k0=VPN + andi k0, k0, 0xff8 # k0=page tab offset + PTR_ADDU k1, k1, k0 # k1=pte address + lw k0, 0(k1) # k0=lo0 pte + lw k1, 4(k1) # k1=lo1 pte + CLEAR_PTE_SWBITS(k0) + MTC0 k0, COP_0_TLB_LO0 # lo0 is loaded + COP0_SYNC + CLEAR_PTE_SWBITS(k1) + MTC0 k1, COP_0_TLB_LO1 # lo1 is loaded + COP0_SYNC tlbwr # write to tlb HAZARD_DELAY eret # return from exception @@ -1061,11 +1004,11 @@ END(MipsTLBMissException) * *---------------------------------------------------------------------------- */ -NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE +NON_LEAF(MipsFPTrap, CALLFRAME_SIZ, ra) + PTR_SUBU sp, sp, CALLFRAME_SIZ mfc0 t0, COP_0_STATUS_REG - sw ra, STAND_RA_OFFSET(sp) - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) + REG_S ra, CALLFRAME_RA(sp) + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) or t1, t0, SR_COP_1_BIT mtc0 t1, COP_0_STATUS_REG @@ -1086,10 +1029,10 @@ NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) * The instruction is in the branch delay slot so the branch will have to * be emulated to get the resulting PC. */ - sw a2, STAND_FRAME_SIZE + 8(sp) + PTR_S a2, CALLFRAME_SIZ + 8(sp) GET_CPU_PCPU(a0) #mips64 unsafe? - lw a0, PC_CURPCB(a0) + PTR_L a0, PC_CURPCB(a0) PTR_ADDU a0, a0, U_PCB_REGS # first arg is ptr to CPU registers move a1, a2 # second arg is instruction PC move a2, t1 # third arg is floating point CSR @@ -1100,7 +1043,7 @@ NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) * Now load the floating-point instruction in the branch delay slot * to be emulated. */ - lw a2, STAND_FRAME_SIZE + 8(sp) # restore EXC pc + PTR_L a2, CALLFRAME_SIZ + 8(sp) # restore EXC pc b 2f lw a0, 4(a2) # a0 = coproc instruction /* @@ -1110,10 +1053,10 @@ NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) 1: lw a0, 0(a2) # a0 = coproc instruction #xxx mips64 unsafe? - addu v0, a2, 4 # v0 = next pc + PTR_ADDU v0, a2, 4 # v0 = next pc 2: GET_CPU_PCPU(t2) - lw t2, PC_CURPCB(t2) + PTR_L t2, PC_CURPCB(t2) SAVE_U_PCB_REG(v0, PC, t2) # save new pc /* * Check to see if the instruction to be emulated is a floating-point @@ -1127,7 +1070,7 @@ NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) */ 3: GET_CPU_PCPU(a0) - lw a0, PC_CURTHREAD(a0) # get current thread + PTR_L a0, PC_CURTHREAD(a0) # get current thread cfc1 a2, FPC_CSR # code = FP execptions ctc1 zero, FPC_CSR # Clear exceptions PTR_LA t3, _C_LABEL(trapsignal) @@ -1149,12 +1092,12 @@ NON_LEAF(MipsFPTrap, STAND_FRAME_SIZE, ra) */ FPReturn: mfc0 t0, COP_0_STATUS_REG - lw ra, STAND_RA_OFFSET(sp) + PTR_L ra, CALLFRAME_RA(sp) and t0, t0, ~SR_COP_1_BIT mtc0 t0, COP_0_STATUS_REG ITLBNOPFIX j ra - PTR_ADDU sp, sp, STAND_FRAME_SIZE + PTR_ADDU sp, sp, CALLFRAME_SIZ END(MipsFPTrap) /* @@ -1182,7 +1125,7 @@ VECTOR(MipsCache, unknown) PTR_LA k0, _C_LABEL(MipsCacheException) li k1, MIPS_PHYS_MASK and k0, k1 - li k1, MIPS_KSEG1_START + PTR_LI k1, MIPS_KSEG1_START or k0, k1 j k0 nop @@ -1200,16 +1143,16 @@ NESTED_NOPROFILE(MipsCacheException, KERN_EXC_FRAME_SIZE, ra) .mask 0x80000000, -4 PTR_LA k0, _C_LABEL(panic) # return to panic PTR_LA a0, 9f # panicstr - _MFC0 a1, COP_0_ERROR_PC + MFC0 a1, COP_0_ERROR_PC mfc0 a2, COP_0_CACHE_ERR # 3rd arg cache error - _MTC0 k0, COP_0_ERROR_PC # set return address + MTC0 k0, COP_0_ERROR_PC # set return address mfc0 k0, COP_0_STATUS_REG # restore status li k1, SR_DIAG_DE # ignore further errors or k0, k1 mtc0 k0, COP_0_STATUS_REG # restore status - HAZARD_DELAY + COP0_SYNC eret diff --git a/sys/mips/mips/fp.S b/sys/mips/mips/fp.S index afe9f03d1e0..ce1702eb225 100644 --- a/sys/mips/mips/fp.S +++ b/sys/mips/mips/fp.S @@ -94,9 +94,9 @@ * *---------------------------------------------------------------------------- */ -NON_LEAF(MipsEmulateFP, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE - sw ra, STAND_RA_OFFSET(sp) +NON_LEAF(MipsEmulateFP, CALLFRAME_SIZ, ra) + subu sp, sp, CALLFRAME_SIZ + sw ra, CALLFRAME_RA(sp) /* * Decode the FMT field (bits 24-21) and FUNCTION field (bits 5-0). */ @@ -2247,8 +2247,8 @@ result_fs_d: # result is FS jal set_fd_d # save result (in t0,t1,t2,t3) done: - lw ra, STAND_RA_OFFSET(sp) - addu sp, sp, STAND_FRAME_SIZE + lw ra, CALLFRAME_RA(sp) + addu sp, sp, CALLFRAME_SIZ j ra END(MipsEmulateFP) diff --git a/sys/mips/mips/genassym.c b/sys/mips/mips/genassym.c index 62da8f1f501..53f962b69f3 100644 --- a/sys/mips/mips/genassym.c +++ b/sys/mips/mips/genassym.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ __FBSDID("$FreeBSD$"); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_UPTE, offsetof(struct thread, td_md.md_upte)); -ASSYM(TD_REALKSTACK, offsetof(struct thread, td_md.md_realstack)); +ASSYM(TD_KSTACK, offsetof(struct thread, td_kstack)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); @@ -87,15 +88,18 @@ ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); -ASSYM(VM_KERNEL_ALLOC_OFFSET, VM_KERNEL_ALLOC_OFFSET); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(SIGFPE, SIGFPE); ASSYM(PAGE_SHIFT, PAGE_SHIFT); -ASSYM(PGSHIFT, PGSHIFT); -ASSYM(NBPG, NBPG); +ASSYM(PAGE_SIZE, PAGE_SIZE); +ASSYM(PAGE_MASK, PAGE_MASK); ASSYM(SEGSHIFT, SEGSHIFT); ASSYM(NPTEPG, NPTEPG); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(PCPU_SIZE, sizeof(struct pcpu)); ASSYM(MAXCOMLEN, MAXCOMLEN); + +ASSYM(MIPS_KSEG0_START, MIPS_KSEG0_START); +ASSYM(MIPS_KSEG1_START, MIPS_KSEG1_START); +ASSYM(MIPS_KSEG2_START, MIPS_KSEG2_START); diff --git a/sys/mips/mips/locore.S b/sys/mips/mips/locore.S index 1574d7542a1..d626add4215 100644 --- a/sys/mips/mips/locore.S +++ b/sys/mips/mips/locore.S @@ -162,11 +162,23 @@ VECTOR(_locore, unknown) sw a2, _C_LABEL(fenvp) #endif +#if defined(TARGET_OCTEON) && defined(SMP) + .set push + .set mips32r2 + rdhwr t2, $0 + beqz t2, 1f + nop + j octeon_ap_wait + nop + .set pop +1: +#endif + /* * Initialize stack and call machine startup. */ PTR_LA sp, _C_LABEL(pcpu_space) - addiu sp, (NBPG * 2) - CALLFRAME_SIZ + addiu sp, (PAGE_SIZE * 2) - CALLFRAME_SIZ sw zero, CALLFRAME_SIZ - 4(sp) # Zero out old ra for debugger sw zero, CALLFRAME_SIZ - 8(sp) # Zero out old fp for debugger @@ -178,10 +190,10 @@ VECTOR(_locore, unknown) nop PTR_LA sp, _C_LABEL(thread0) - lw a0, TD_PCB(sp) - li t0, ~7 + PTR_L a0, TD_PCB(sp) + REG_LI t0, ~7 and a0, a0, t0 - subu sp, a0, CALLFRAME_SIZ + PTR_SUBU sp, a0, CALLFRAME_SIZ jal _C_LABEL(mi_startup) # mi_startup(frame) sw zero, CALLFRAME_SIZ - 8(sp) # Zero out old fp for debugger diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index ad1dab52206..a9c484c806e 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -142,10 +142,6 @@ vm_offset_t physmem_desc[PHYS_AVAIL_ENTRIES + 2]; struct platform platform; #endif -vm_paddr_t mips_wired_tlb_physmem_start; -vm_paddr_t mips_wired_tlb_physmem_end; -u_int need_wired_tlb_page_pool; - static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); @@ -302,14 +298,13 @@ mips_proc0_init(void) (long)kstack0)); thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES; - thread0.td_md.md_realstack = roundup2(thread0.td_kstack, PAGE_SIZE * 2); /* * Do not use cpu_thread_alloc to initialize these fields * thread0 is the only thread that has kstack located in KSEG0 * while cpu_thread_alloc handles kstack allocated in KSEG2. */ - thread0.td_pcb = (struct pcb *)(thread0.td_md.md_realstack + - (thread0.td_kstack_pages - 1) * PAGE_SIZE) - 1; + thread0.td_pcb = (struct pcb *)(thread0.td_kstack + + thread0.td_kstack_pages * PAGE_SIZE) - 1; thread0.td_frame = &thread0.td_pcb->pcb_regs; /* Steal memory for the dynamic per-cpu area. */ @@ -374,11 +369,9 @@ mips_vector_init(void) * when handler is installed for it */ set_intr_mask(ALL_INT_MASK); - enableintr(); /* Clear BEV in SR so we start handling our own exceptions */ - mips_cp0_status_write(mips_cp0_status_read() & ~SR_BOOT_EXC_VEC); - + mips_wr_status(mips_rd_status() & ~SR_BOOT_EXC_VEC); } /* @@ -475,7 +468,7 @@ spinlock_enter(void) td = curthread; if (td->td_md.md_spinlock_count == 0) - td->td_md.md_saved_intr = disableintr(); + td->td_md.md_saved_intr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } @@ -489,16 +482,7 @@ spinlock_exit(void) critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) - restoreintr(td->td_md.md_saved_intr); -} - -u_int32_t -get_cyclecount(void) -{ - u_int32_t count; - - mfc0_macro(count, 9); - return (count); + intr_restore(td->td_md.md_saved_intr); } /* @@ -507,7 +491,7 @@ get_cyclecount(void) void cpu_idle(int busy) { - if (mips_cp0_status_read() & SR_INT_ENAB) + if (mips_rd_status() & SR_INT_ENAB) __asm __volatile ("wait"); else panic("ints disabled in idleproc!"); diff --git a/sys/mips/mips/mem.c b/sys/mips/mips/mem.c index c0e88e08e0c..9db282e4206 100644 --- a/sys/mips/mips/mem.c +++ b/sys/mips/mips/mem.c @@ -1,13 +1,12 @@ -/* $OpenBSD: mem.c,v 1.2 1998/08/31 17:42:34 millert Exp $ */ -/* $NetBSD: mem.c,v 1.6 1995/04/10 11:55:03 mycroft Exp $ */ -/* +/*- * Copyright (c) 1988 University of Utah. - * Copyright (c) 1982, 1986, 1990, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. + * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer - * Science Department and Ralph Campbell. + * Science Department, and code derived from software contributed to + * Berkeley by William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -33,161 +32,138 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)mem.c 8.3 (Berkeley) 1/12/94 - * JNPR: mem.c,v 1.3 2007/08/09 11:23:32 katta Exp $ - */ - -/* - * Memory special file + * from: Utah $Hdr: mem.c 1.13 89/10/08$ + * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 */ #include __FBSDID("$FreeBSD$"); +/* + * Memory special file + */ + #include -#include #include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include + #include -#include +#include + +#include +#include +#include +#include + #include +struct mem_range_softc mem_range_softc; -extern struct sysmaps sysmaps_pcpu[]; -/*ARGSUSED*/ +/* ARGSUSED */ int -memrw(dev, uio, flags) - struct cdev *dev; - struct uio *uio; - int flags; +memrw(struct cdev *dev, struct uio *uio, int flags) { - register vm_offset_t v; - register int c; - register struct iovec *iov; + struct iovec *iov; int error = 0; + vm_offset_t va, eva, off, v; + vm_prot_t prot; + struct vm_page m; + vm_page_t marr; + vm_size_t cnt; - while (uio->uio_resid > 0 && error == 0) { + cnt = 0; + error = 0; + + GIANT_REQUIRED; + + while (uio->uio_resid > 0 && !error) { iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) - panic("mmrw"); + panic("memrw"); continue; } - - /* minor device 0 is physical memory */ if (dev2unit(dev) == CDEV_MINOR_MEM) { v = uio->uio_offset; - c = iov->iov_len; - vm_offset_t va; - vm_paddr_t pa; - register int o; + off = uio->uio_offset & PAGE_MASK; + cnt = PAGE_SIZE - ((vm_offset_t)iov->iov_base & + PAGE_MASK); + cnt = min(cnt, PAGE_SIZE - off); + cnt = min(cnt, iov->iov_len); - if (is_cacheable_mem(v) && - is_cacheable_mem(v + c - 1)) { - struct fpage *fp; - struct sysmaps *sysmaps; - - sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; - mtx_lock(&sysmaps->lock); - sched_pin(); - - fp = &sysmaps->fp[PMAP_FPAGE1]; - pa = uio->uio_offset & ~PAGE_MASK; - va = pmap_map_fpage(pa, fp, FALSE); - o = (int)uio->uio_offset & PAGE_MASK; - c = (u_int)(PAGE_SIZE - - ((uintptr_t)iov->iov_base & PAGE_MASK)); - c = min(c, (u_int)(PAGE_SIZE - o)); - c = min(c, (u_int)iov->iov_len); - error = uiomove((caddr_t)(va + o), (int)c, uio); - pmap_unmap_fpage(pa, fp); - sched_unpin(); - mtx_unlock(&sysmaps->lock); - } else - return (EFAULT); - continue; + m.phys_addr = trunc_page(v); + marr = &m; + error = uiomove_fromphys(&marr, off, cnt, uio); } - - /* minor device 1 is kernel memory */ else if (dev2unit(dev) == CDEV_MINOR_KMEM) { - v = uio->uio_offset; - c = min(iov->iov_len, MAXPHYS); + va = uio->uio_offset; - vm_offset_t addr, eaddr; - vm_offset_t wired_tlb_virtmem_end; + va = trunc_page(uio->uio_offset); + eva = round_page(uio->uio_offset + + iov->iov_len); - /* - * Make sure that all of the pages are currently - * resident so that we don't create any zero-fill pages. + /* + * Make sure that all the pages are currently resident + * so that we don't create any zero-fill pages. */ - addr = trunc_page(uio->uio_offset); - eaddr = round_page(uio->uio_offset + c); - - if (addr > (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { - wired_tlb_virtmem_end = VM_MIN_KERNEL_ADDRESS + - VM_KERNEL_ALLOC_OFFSET; - if ((addr < wired_tlb_virtmem_end) && - (eaddr >= wired_tlb_virtmem_end)) - addr = wired_tlb_virtmem_end; - - if (addr >= wired_tlb_virtmem_end) { - for (; addr < eaddr; addr += PAGE_SIZE) - if (pmap_extract(kernel_pmap, - addr) == 0) - return EFAULT; - - if (!kernacc( - (caddr_t)(uintptr_t)uio->uio_offset, c, - uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) + if (va >= VM_MIN_KERNEL_ADDRESS && + eva <= VM_MAX_KERNEL_ADDRESS) { + for (; va < eva; va += PAGE_SIZE) + if (pmap_extract(kernel_pmap, va) == 0) return (EFAULT); - } - } - else if (MIPS_IS_KSEG0_ADDR(v)) { - if (MIPS_KSEG0_TO_PHYS(v + c) >= ctob(physmem)) + + prot = (uio->uio_rw == UIO_READ) + ? VM_PROT_READ : VM_PROT_WRITE; + + va = uio->uio_offset; + if (kernacc((void *) va, iov->iov_len, prot) + == FALSE) return (EFAULT); } - else if (MIPS_IS_KSEG1_ADDR(v)) { - if (MIPS_KSEG1_TO_PHYS(v + c) >= ctob(physmem)) - return (EFAULT); - } - else - return (EFAULT); - - error = uiomove((caddr_t)v, c, uio); + va = uio->uio_offset; + error = uiomove((void *)va, iov->iov_len, uio); continue; } - } + return (error); } -/*ARGSUSED*/ +/* + * allow user processes to MMAP some memory sections + * instead of going through read/write + */ int -memmmap(struct cdev *dev, vm_ooffset_t off, vm_paddr_t *paddr, +memmmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { + /* + * /dev/mem is the only one that makes sense through this + * interface. For /dev/kmem any physaddr we return here + * could be transient and hence incorrect or invalid at + * a later time. + */ + if (dev2unit(dev) != CDEV_MINOR_MEM) + return (-1); - return (EOPNOTSUPP); + *paddr = offset; + + return (0); } void diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index f65db9bd6b4..f0c142f0c3a 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -157,6 +157,8 @@ start_ap(int cpuid) cpus = mp_naps; dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); + mips_sync(); + if (platform_start_ap(cpuid) != 0) return (-1); /* could not start AP */ @@ -246,6 +248,8 @@ smp_init_secondary(u_int32_t cpuid) mips_dcache_wbinv_all(); mips_icache_sync_all(); + mips_sync(); + MachSetPID(0); pcpu_init(PCPU_ADDR(cpuid), cpuid, sizeof(struct pcpu)); @@ -296,7 +300,7 @@ smp_init_secondary(u_int32_t cpuid) */ mips_wr_compare(mips_rd_count() + counter_freq / hz); - enableintr(); + intr_enable(); /* enter the scheduler */ sched_throw(NULL); diff --git a/sys/mips/mips/mpboot.S b/sys/mips/mips/mpboot.S index ce2982d4005..631099c5964 100644 --- a/sys/mips/mips/mpboot.S +++ b/sys/mips/mips/mpboot.S @@ -36,8 +36,21 @@ .set noat .set noreorder +#ifdef TARGET_OCTEON +#define CLEAR_STATUS \ + mfc0 a0, COP_0_STATUS_REG ;\ + li a2, (MIPS_SR_KX | MIPS_SR_SX | MIPS_SR_UX) ; \ + or a0, a0, a2 ; \ + li a2, ~(MIPS_SR_INT_IE | MIPS_SR_EXL | SR_KSU_USER | MIPS_SR_BEV) ; \ + and a0, a0, a2 ; \ + mtc0 a0, COP_0_STATUS_REG +#else +#define CLEAR_STATUS \ + mtc0 zero, COP_0_STATUS_REG +#endif + GLOBAL(mpentry) - mtc0 zero, COP_0_STATUS_REG /* disable interrupts */ + CLEAR_STATUS /* disable interrupts */ mtc0 zero, COP_0_CAUSE_REG /* clear soft interrupts */ @@ -53,7 +66,7 @@ GLOBAL(mpentry) * Initialize stack and call machine startup */ PTR_LA sp, _C_LABEL(pcpu_space) - addiu sp, (NBPG * 2) - CALLFRAME_SIZ + addiu sp, (PAGE_SIZE * 2) - CALLFRAME_SIZ sll t0, s0, PAGE_SHIFT + 1 addu sp, sp, t0 diff --git a/sys/mips/mips/nexus.c b/sys/mips/mips/nexus.c index 6600980bb02..10b38b5ff86 100644 --- a/sys/mips/mips/nexus.c +++ b/sys/mips/mips/nexus.c @@ -166,16 +166,19 @@ static int nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { + register_t s; int irq; - intrmask_t s = disableintr(); + s = intr_disable(); irq = rman_get_start(res); - if (irq >= NUM_MIPS_IRQS) + if (irq >= NUM_MIPS_IRQS) { + intr_restore(s); return (0); + } cpu_establish_hardintr(device_get_nameunit(child), filt, intr, arg, irq, flags, cookiep); - restoreintr(s); + intr_restore(s); return (0); } diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 73f57e4fbee..dbce42a6d03 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -147,7 +147,6 @@ unsigned pmap_max_asid; /* max ASID supported by the system */ #define PMAP_ASID_RESERVED 0 - vm_offset_t kernel_vm_end; static struct tlb tlbstash[MAXCPU][MIPS_MAX_TLB_ENTRIES]; @@ -161,10 +160,6 @@ static uma_zone_t pvzone; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; -struct fpage fpages_shared[FPAGES_SHARED]; - -struct sysmaps sysmaps_pcpu[MAXCPU]; - static PMAP_INLINE void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap); static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem); @@ -188,7 +183,6 @@ static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); static void pmap_TLB_invalidate_kernel(vm_offset_t); static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t); -static void pmap_init_fpage(void); #ifdef SMP static void pmap_invalidate_page_action(void *arg); @@ -199,10 +193,7 @@ static void pmap_update_page_action(void *arg); struct local_sysmaps { struct mtx lock; - pt_entry_t CMAP1; - pt_entry_t CMAP2; - caddr_t CADDR1; - caddr_t CADDR2; + vm_offset_t base; uint16_t valid1, valid2; }; @@ -216,6 +207,59 @@ struct local_sysmaps { static struct local_sysmaps sysmap_lmem[MAXCPU]; caddr_t virtual_sys_start = (caddr_t)0; +#define PMAP_LMEM_MAP1(va, phys) \ + int cpu; \ + struct local_sysmaps *sysm; \ + pt_entry_t *pte, npte; \ + \ + cpu = PCPU_GET(cpuid); \ + sysm = &sysmap_lmem[cpu]; \ + PMAP_LGMEM_LOCK(sysm); \ + intr = intr_disable(); \ + sched_pin(); \ + va = sysm->base; \ + npte = mips_paddr_to_tlbpfn(phys) | \ + PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ + pte = pmap_pte(kernel_pmap, va); \ + *pte = npte; \ + sysm->valid1 = 1; + +#define PMAP_LMEM_MAP2(va1, phys1, va2, phys2) \ + int cpu; \ + struct local_sysmaps *sysm; \ + pt_entry_t *pte, npte; \ + \ + cpu = PCPU_GET(cpuid); \ + sysm = &sysmap_lmem[cpu]; \ + PMAP_LGMEM_LOCK(sysm); \ + intr = intr_disable(); \ + sched_pin(); \ + va1 = sysm->base; \ + va2 = sysm->base + PAGE_SIZE; \ + npte = mips_paddr_to_tlbpfn(phys1) | \ + PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ + pte = pmap_pte(kernel_pmap, va1); \ + *pte = npte; \ + npte = mips_paddr_to_tlbpfn(phys2) | \ + PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; \ + pte = pmap_pte(kernel_pmap, va2); \ + *pte = npte; \ + sysm->valid1 = 1; \ + sysm->valid2 = 1; + +#define PMAP_LMEM_UNMAP() \ + pte = pmap_pte(kernel_pmap, sysm->base); \ + *pte = PTE_G; \ + pmap_TLB_invalidate_kernel(sysm->base); \ + sysm->valid1 = 0; \ + pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); \ + *pte = PTE_G; \ + pmap_TLB_invalidate_kernel(sysm->base + PAGE_SIZE); \ + sysm->valid2 = 0; \ + sched_unpin(); \ + intr_restore(intr); \ + PMAP_LGMEM_UNLOCK(sysm); + pd_entry_t pmap_segmap(pmap_t pmap, vm_offset_t va) { @@ -355,7 +399,7 @@ again: kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); - virtual_avail = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; + virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_KERNEL_ADDRESS; #ifdef SMP @@ -387,12 +431,8 @@ again: */ if (memory_larger_than_512meg) { for (i = 0; i < MAXCPU; i++) { - sysmap_lmem[i].CMAP1 = PTE_G; - sysmap_lmem[i].CMAP2 = PTE_G; - sysmap_lmem[i].CADDR1 = (caddr_t)virtual_avail; - virtual_avail += PAGE_SIZE; - sysmap_lmem[i].CADDR2 = (caddr_t)virtual_avail; - virtual_avail += PAGE_SIZE; + sysmap_lmem[i].base = virtual_avail; + virtual_avail += PAGE_SIZE * 2; sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; PMAP_LGMEM_LOCK_INIT(&sysmap_lmem[i]); } @@ -480,8 +520,6 @@ void pmap_init(void) { - if (need_wired_tlb_page_pool) - pmap_init_fpage(); /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive @@ -570,7 +608,7 @@ pmap_invalidate_page_action(void *arg) pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; return; } - va = pmap_va_asid(pmap, (va & ~PGOFSET)); + va = pmap_va_asid(pmap, (va & ~PAGE_MASK)); mips_TBIS(va); } @@ -621,7 +659,7 @@ pmap_update_page_action(void *arg) pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; return; } - va = pmap_va_asid(pmap, va); + va = pmap_va_asid(pmap, (va & ~PAGE_MASK)); MachTLBUpdate(va, pte); } @@ -630,6 +668,8 @@ pmap_TLB_update_kernel(vm_offset_t va, pt_entry_t pte) { u_int32_t pid; + va &= ~PAGE_MASK; + MachTLBGetPID(pid); va = va | (pid << VMTLB_PID_SHIFT); @@ -669,18 +709,22 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); - +retry: pte = *pmap_pte(pmap, va); if (pte != 0 && pmap_pte_v(&pte) && ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, mips_tlbpfn_to_paddr(pte), &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -726,7 +770,7 @@ pmap_kremove(vm_offset_t va) /* * Write back all caches from the page being destroyed */ - mips_dcache_wbinv_range_index(va, NBPG); + mips_dcache_wbinv_range_index(va, PAGE_SIZE); pte = pmap_pte(kernel_pmap, va); *pte = PTE_G; @@ -805,136 +849,6 @@ pmap_qremove(vm_offset_t va, int count) * Page table page management routines..... ***************************************************/ -/* - * floating pages (FPAGES) management routines - * - * FPAGES are the reserved virtual memory areas which can be - * mapped to any physical memory. This gets used typically - * in the following functions: - * - * pmap_zero_page - * pmap_copy_page - */ - -/* - * Create the floating pages, aka FPAGES! - */ -static void -pmap_init_fpage() -{ - vm_offset_t kva; - int i, j; - struct sysmaps *sysmaps; - - /* - * We allocate a total of (FPAGES*MAXCPU + FPAGES_SHARED + 1) pages - * at first. FPAGES & FPAGES_SHARED should be EVEN Then we'll adjust - * 'kva' to be even-page aligned so that the fpage area can be wired - * in the TLB with a single TLB entry. - */ - kva = kmem_alloc_nofault(kernel_map, - (FPAGES * MAXCPU + 1 + FPAGES_SHARED) * PAGE_SIZE); - if ((void *)kva == NULL) - panic("pmap_init_fpage: fpage allocation failed"); - - /* - * Make up start at an even page number so we can wire down the - * fpage area in the tlb with a single tlb entry. - */ - if ((((vm_offset_t)kva) >> PGSHIFT) & 1) { - /* - * 'kva' is not even-page aligned. Adjust it and free the - * first page which is unused. - */ - kmem_free(kernel_map, (vm_offset_t)kva, NBPG); - kva = ((vm_offset_t)kva) + NBPG; - } else { - /* - * 'kva' is even page aligned. We don't need the last page, - * free it. - */ - kmem_free(kernel_map, ((vm_offset_t)kva) + FSPACE, NBPG); - } - - for (i = 0; i < MAXCPU; i++) { - sysmaps = &sysmaps_pcpu[i]; - mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); - - /* Assign FPAGES pages to the CPU */ - for (j = 0; j < FPAGES; j++) - sysmaps->fp[j].kva = kva + (j) * PAGE_SIZE; - kva = ((vm_offset_t)kva) + (FPAGES * PAGE_SIZE); - } - - /* - * An additional 2 pages are needed, one for pmap_zero_page_idle() - * and one for coredump. These pages are shared by all cpu's - */ - fpages_shared[PMAP_FPAGE3].kva = kva; - fpages_shared[PMAP_FPAGE_KENTER_TEMP].kva = kva + PAGE_SIZE; -} - -/* - * Map the page to the fpage virtual address as specified thru' fpage id - */ -vm_offset_t -pmap_map_fpage(vm_paddr_t pa, struct fpage *fp, boolean_t check_unmaped) -{ - vm_offset_t kva; - register pt_entry_t *pte; - pt_entry_t npte; - - KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); - /* - * Check if the fpage is free - */ - if (fp->state) { - if (check_unmaped == TRUE) - pmap_unmap_fpage(pa, fp); - else - panic("pmap_map_fpage: fpage is busy"); - } - fp->state = TRUE; - kva = fp->kva; - - npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - pte = pmap_pte(kernel_pmap, kva); - *pte = npte; - - pmap_TLB_update_kernel(kva, npte); - - return (kva); -} - -/* - * Unmap the page from the fpage virtual address as specified thru' fpage id - */ -void -pmap_unmap_fpage(vm_paddr_t pa, struct fpage *fp) -{ - vm_offset_t kva; - register pt_entry_t *pte; - - KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); - /* - * Check if the fpage is busy - */ - if (!(fp->state)) { - panic("pmap_unmap_fpage: fpage is free"); - } - kva = fp->kva; - - pte = pmap_pte(kernel_pmap, kva); - *pte = PTE_G; - pmap_TLB_invalidate_kernel(kva); - - fp->state = FALSE; - - /* - * Should there be any flush operation at the end? - */ -} - /* Revision 1.507 * * Simplify the reference counting of page table pages. Specifically, use @@ -1051,10 +965,6 @@ pmap_pinit(pmap_t pmap) req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) - req |= VM_ALLOC_WIRED_TLB_PG_POOL; -#endif /* * allocate the page directory page */ @@ -1105,10 +1015,6 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); req = VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) - req |= VM_ALLOC_WIRED_TLB_PG_POOL; -#endif /* * Find or fabricate a new pagetable page */ @@ -1263,6 +1169,7 @@ pmap_release(pmap_t pmap) ptdpg->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(ptdpg); + PMAP_LOCK_DESTROY(pmap); } /* @@ -1278,7 +1185,7 @@ pmap_growkernel(vm_offset_t addr) mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { - kernel_vm_end = VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET; + kernel_vm_end = VM_MIN_KERNEL_ADDRESS; nkpt = 0; while (segtab_pde(kernel_segmap, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & @@ -1307,10 +1214,6 @@ pmap_growkernel(vm_offset_t addr) * This index is bogus, but out of the way */ req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) - req |= VM_ALLOC_WIRED_TLB_PG_POOL; -#endif nkpg = vm_page_alloc(NULL, nkpt, req); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); @@ -1618,7 +1521,7 @@ pmap_remove_page(struct pmap *pmap, vm_offset_t va) /* * Write back all caches from the page being destroyed */ - mips_dcache_wbinv_range_index(va, NBPG); + mips_dcache_wbinv_range_index(va, PAGE_SIZE); /* * get a local va for mappings for this pmap. @@ -1705,7 +1608,7 @@ pmap_remove_all(vm_page_t m) * the page being destroyed */ if (m->md.pv_list_count == 1) - mips_dcache_wbinv_range_index(pv->pv_va, NBPG); + mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); pv->pv_pmap->pm_stats.resident_count--; @@ -1790,16 +1693,10 @@ retry: obits = pbits = *pte; pa = mips_tlbpfn_to_paddr(pbits); - if (page_is_managed(pa)) { + if (page_is_managed(pa) && (pbits & PTE_M) != 0) { m = PHYS_TO_VM_PAGE(pa); - if (m->md.pv_flags & PV_TABLE_REF) { - vm_page_flag_set(m, PG_REFERENCED); - m->md.pv_flags &= ~PV_TABLE_REF; - } - if (pbits & PTE_M) { - vm_page_dirty(m); - m->md.pv_flags &= ~PV_TABLE_MOD; - } + vm_page_dirty(m); + m->md.pv_flags &= ~PV_TABLE_MOD; } pbits = (pbits & ~PTE_M) | PTE_RO; @@ -1987,7 +1884,7 @@ validate: if (origpte & PTE_M) { KASSERT((origpte & PTE_RW), ("pmap_enter: modified page not writable:" - " va: %p, pte: 0x%lx", (void *)va, origpte)); + " va: %p, pte: 0x%x", (void *)va, origpte)); if (page_is_managed(opa)) vm_page_dirty(om); } @@ -2004,8 +1901,8 @@ validate: */ if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && (prot & VM_PROT_EXECUTE)) { - mips_icache_sync_range(va, NBPG); - mips_dcache_wbinv_range(va, NBPG); + mips_icache_sync_range(va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); } vm_page_unlock_queues(); PMAP_UNLOCK(pmap); @@ -2134,8 +2031,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * unresolvable TLB miss may occur. */ if (pmap == &curproc->p_vmspace->vm_pmap) { va &= ~PAGE_MASK; - mips_icache_sync_range(va, NBPG); - mips_dcache_wbinv_range(va, NBPG); + mips_icache_sync_range(va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); } } return (mpte); @@ -2149,36 +2046,34 @@ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; - int int_level; + register_t intr; if (i != 0) printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", __func__); -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) { - va = pmap_map_fpage(pa, &fpages_shared[PMAP_FPAGE_KENTER_TEMP], - TRUE); - } else -#endif if (pa < MIPS_KSEG0_LARGEST_PHYS) { va = MIPS_PHYS_TO_KSEG0(pa); } else { int cpu; struct local_sysmaps *sysm; + pt_entry_t *pte, npte; + /* If this is used other than for dumps, we may need to leave * interrupts disasbled on return. If crash dumps don't work when * we get to this point, we might want to consider this (leaving things * disabled as a starting point ;-) */ - int_level = disableintr(); + intr = intr_disable(); cpu = PCPU_GET(cpuid); sysm = &sysmap_lmem[cpu]; /* Since this is for the debugger, no locks or any other fun */ - sysm->CMAP1 = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; + npte = mips_paddr_to_tlbpfn(pa) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; + pte = pmap_pte(kernel_pmap, sysm->base); + *pte = npte; sysm->valid1 = 1; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - va = (vm_offset_t)sysm->CADDR1; - restoreintr(int_level); + pmap_update_page(kernel_pmap, sysm->base, npte); + va = sysm->base; + intr_restore(intr); } return ((void *)va); } @@ -2187,7 +2082,7 @@ void pmap_kenter_temporary_free(vm_paddr_t pa) { int cpu; - int int_level; + register_t intr; struct local_sysmaps *sysm; if (pa < MIPS_KSEG0_LARGEST_PHYS) { @@ -2197,10 +2092,13 @@ pmap_kenter_temporary_free(vm_paddr_t pa) cpu = PCPU_GET(cpuid); sysm = &sysmap_lmem[cpu]; if (sysm->valid1) { - int_level = disableintr(); - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); - restoreintr(int_level); - sysm->CMAP1 = 0; + pt_entry_t *pte; + + intr = intr_disable(); + pte = pmap_pte(kernel_pmap, sysm->base); + *pte = PTE_G; + pmap_invalidate_page(kernel_pmap, sysm->base); + intr_restore(intr); sysm->valid1 = 0; } } @@ -2310,54 +2208,21 @@ pmap_zero_page(vm_page_t m) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - int int_level; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) { - struct fpage *fp1; - struct sysmaps *sysmaps; + register_t intr; - sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; - mtx_lock(&sysmaps->lock); - sched_pin(); - - fp1 = &sysmaps->fp[PMAP_FPAGE1]; - va = pmap_map_fpage(phys, fp1, FALSE); - bzero((caddr_t)va, PAGE_SIZE); - pmap_unmap_fpage(phys, fp1); - sched_unpin(); - mtx_unlock(&sysmaps->lock); - /* - * Should you do cache flush? - */ - } else -#endif if (phys < MIPS_KSEG0_LARGEST_PHYS) { - va = MIPS_PHYS_TO_KSEG0(phys); bzero((caddr_t)va, PAGE_SIZE); mips_dcache_wbinv_range(va, PAGE_SIZE); } else { - int cpu; - struct local_sysmaps *sysm; + PMAP_LMEM_MAP1(va, phys); - cpu = PCPU_GET(cpuid); - sysm = &sysmap_lmem[cpu]; - PMAP_LGMEM_LOCK(sysm); - sched_pin(); - int_level = disableintr(); - sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - sysm->valid1 = 1; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - bzero(sysm->CADDR1, PAGE_SIZE); - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); - restoreintr(int_level); - sysm->CMAP1 = 0; - sysm->valid1 = 0; - sched_unpin(); - PMAP_LGMEM_UNLOCK(sysm); + bzero((caddr_t)va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); + + PMAP_LMEM_UNMAP(); } - } /* @@ -2371,48 +2236,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - int int_level; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) { - struct fpage *fp1; - struct sysmaps *sysmaps; + register_t intr; - sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; - mtx_lock(&sysmaps->lock); - sched_pin(); - - fp1 = &sysmaps->fp[PMAP_FPAGE1]; - va = pmap_map_fpage(phys, fp1, FALSE); - bzero((caddr_t)va + off, size); - pmap_unmap_fpage(phys, fp1); - - sched_unpin(); - mtx_unlock(&sysmaps->lock); - } else -#endif if (phys < MIPS_KSEG0_LARGEST_PHYS) { va = MIPS_PHYS_TO_KSEG0(phys); bzero((char *)(caddr_t)va + off, size); mips_dcache_wbinv_range(va + off, size); } else { - int cpu; - struct local_sysmaps *sysm; + PMAP_LMEM_MAP1(va, phys); - cpu = PCPU_GET(cpuid); - sysm = &sysmap_lmem[cpu]; - PMAP_LGMEM_LOCK(sysm); - int_level = disableintr(); - sched_pin(); - sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - sysm->valid1 = 1; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - bzero((char *)sysm->CADDR1 + off, size); - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); - restoreintr(int_level); - sysm->CMAP1 = 0; - sysm->valid1 = 0; - sched_unpin(); - PMAP_LGMEM_UNLOCK(sysm); + bzero((char *)va + off, size); + mips_dcache_wbinv_range(va + off, size); + + PMAP_LMEM_UNMAP(); } } @@ -2421,41 +2257,20 @@ pmap_zero_page_idle(vm_page_t m) { vm_offset_t va; vm_paddr_t phys = VM_PAGE_TO_PHYS(m); - int int_level; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) { - sched_pin(); - va = pmap_map_fpage(phys, &fpages_shared[PMAP_FPAGE3], FALSE); - bzero((caddr_t)va, PAGE_SIZE); - pmap_unmap_fpage(phys, &fpages_shared[PMAP_FPAGE3]); - sched_unpin(); - } else -#endif + register_t intr; + if (phys < MIPS_KSEG0_LARGEST_PHYS) { va = MIPS_PHYS_TO_KSEG0(phys); bzero((caddr_t)va, PAGE_SIZE); mips_dcache_wbinv_range(va, PAGE_SIZE); } else { - int cpu; - struct local_sysmaps *sysm; + PMAP_LMEM_MAP1(va, phys); - cpu = PCPU_GET(cpuid); - sysm = &sysmap_lmem[cpu]; - PMAP_LGMEM_LOCK(sysm); - int_level = disableintr(); - sched_pin(); - sysm->CMAP1 = mips_paddr_to_tlbpfn(phys) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - sysm->valid1 = 1; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - bzero(sysm->CADDR1, PAGE_SIZE); - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); - restoreintr(int_level); - sysm->CMAP1 = 0; - sysm->valid1 = 0; - sched_unpin(); - PMAP_LGMEM_UNLOCK(sysm); + bzero((caddr_t)va, PAGE_SIZE); + mips_dcache_wbinv_range(va, PAGE_SIZE); + + PMAP_LMEM_UNMAP(); } - } /* @@ -2470,96 +2285,28 @@ pmap_copy_page(vm_page_t src, vm_page_t dst) vm_offset_t va_src, va_dst; vm_paddr_t phy_src = VM_PAGE_TO_PHYS(src); vm_paddr_t phy_dst = VM_PAGE_TO_PHYS(dst); - int int_level; -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - if (need_wired_tlb_page_pool) { - struct fpage *fp1, *fp2; - struct sysmaps *sysmaps; - - sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; - mtx_lock(&sysmaps->lock); - sched_pin(); - - fp1 = &sysmaps->fp[PMAP_FPAGE1]; - fp2 = &sysmaps->fp[PMAP_FPAGE2]; - - va_src = pmap_map_fpage(phy_src, fp1, FALSE); - va_dst = pmap_map_fpage(phy_dst, fp2, FALSE); - - bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); - - pmap_unmap_fpage(phy_src, fp1); - pmap_unmap_fpage(phy_dst, fp2); - sched_unpin(); - mtx_unlock(&sysmaps->lock); + register_t intr; + if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { + /* easy case, all can be accessed via KSEG0 */ /* - * Should you flush the cache? + * Flush all caches for VA that are mapped to this page + * to make sure that data in SDRAM is up to date */ - } else -#endif - { - if ((phy_src < MIPS_KSEG0_LARGEST_PHYS) && (phy_dst < MIPS_KSEG0_LARGEST_PHYS)) { - /* easy case, all can be accessed via KSEG0 */ - /* - * Flush all caches for VA that are mapped to this page - * to make sure that data in SDRAM is up to date - */ - pmap_flush_pvcache(src); - mips_dcache_wbinv_range_index( - MIPS_PHYS_TO_KSEG0(phy_dst), NBPG); - va_src = MIPS_PHYS_TO_KSEG0(phy_src); - va_dst = MIPS_PHYS_TO_KSEG0(phy_dst); - bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); - mips_dcache_wbinv_range(va_dst, PAGE_SIZE); - } else { - int cpu; - struct local_sysmaps *sysm; + pmap_flush_pvcache(src); + mips_dcache_wbinv_range_index( + MIPS_PHYS_TO_KSEG0(phy_dst), PAGE_SIZE); + va_src = MIPS_PHYS_TO_KSEG0(phy_src); + va_dst = MIPS_PHYS_TO_KSEG0(phy_dst); + bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); + mips_dcache_wbinv_range(va_dst, PAGE_SIZE); + } else { + PMAP_LMEM_MAP2(va_src, phy_src, va_dst, phy_dst); - cpu = PCPU_GET(cpuid); - sysm = &sysmap_lmem[cpu]; - PMAP_LGMEM_LOCK(sysm); - sched_pin(); - int_level = disableintr(); - if (phy_src < MIPS_KSEG0_LARGEST_PHYS) { - /* one side needs mapping - dest */ - va_src = MIPS_PHYS_TO_KSEG0(phy_src); - sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); - sysm->valid2 = 1; - va_dst = (vm_offset_t)sysm->CADDR2; - } else if (phy_dst < MIPS_KSEG0_LARGEST_PHYS) { - /* one side needs mapping - src */ - va_dst = MIPS_PHYS_TO_KSEG0(phy_dst); - sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - va_src = (vm_offset_t)sysm->CADDR1; - sysm->valid1 = 1; - } else { - /* all need mapping */ - sysm->CMAP1 = mips_paddr_to_tlbpfn(phy_src) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - sysm->CMAP2 = mips_paddr_to_tlbpfn(phy_dst) | PTE_RW | PTE_V | PTE_G | PTE_W | PTE_CACHE; - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR1, sysm->CMAP1); - pmap_TLB_update_kernel((vm_offset_t)sysm->CADDR2, sysm->CMAP2); - sysm->valid1 = sysm->valid2 = 1; - va_src = (vm_offset_t)sysm->CADDR1; - va_dst = (vm_offset_t)sysm->CADDR2; - } - bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); - if (sysm->valid1) { - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR1); - sysm->CMAP1 = 0; - sysm->valid1 = 0; - } - if (sysm->valid2) { - pmap_TLB_invalidate_kernel((vm_offset_t)sysm->CADDR2); - sysm->CMAP2 = 0; - sysm->valid2 = 0; - } - restoreintr(int_level); - sched_unpin(); - PMAP_LGMEM_UNLOCK(sysm); - } + bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); + mips_dcache_wbinv_range(va_dst, PAGE_SIZE); + + PMAP_LMEM_UNMAP(); } } @@ -2631,9 +2378,8 @@ pmap_remove_pages(pmap_t pmap) *pte = is_kernel_pmap(pmap) ? PTE_G : 0; m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(tpte)); - - KASSERT(m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad tpte %lx", tpte)); + KASSERT(m != NULL, + ("pmap_remove_pages: bad tpte %x", tpte)); pv->pv_pmap->pm_stats.resident_count--; @@ -2876,6 +2622,20 @@ pmap_clear_modify(vm_page_t m) } } +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + return ((m->flags & PG_FICTITIOUS) == 0 && + (m->md.pv_flags & PV_TABLE_REF) != 0); +} + /* * pmap_clear_reference: * @@ -3001,10 +2761,8 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr) * Referenced by us or someone */ vm_page_lock_queues(); - if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { + if ((m->flags & PG_REFERENCED) || pmap_is_referenced(m)) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; - vm_page_flag_set(m, PG_REFERENCED); - } vm_page_unlock_queues(); } return val; @@ -3063,6 +2821,21 @@ pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, *addr = ((*addr + SEGOFSET) & ~SEGOFSET) + superpage_offset; } +/* + * Increase the starting virtual address of the given mapping so + * that it is aligned to not be the second page in a TLB entry. + * This routine assumes that the length is appropriately-sized so + * that the allocation does not share a TLB entry at all if required. + */ +void +pmap_align_tlb(vm_offset_t *addr) +{ + if ((*addr & PAGE_SIZE) == 0) + return; + *addr += PAGE_SIZE; + return; +} + int pmap_pid_dump(int pid); int @@ -3219,10 +2992,12 @@ page_is_managed(vm_offset_t pa) { vm_offset_t pgnum = mips_btop(pa); - if (pgnum >= first_page && (pgnum < (first_page + vm_page_array_size))) { + if (pgnum >= first_page) { vm_page_t m; m = PHYS_TO_VM_PAGE(pa); + if (m == NULL) + return 0; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) return 1; } @@ -3254,20 +3029,6 @@ init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot) return rw; } -/* - * pmap_page_is_free: - * - * Called when a page is freed to allow pmap to clean up - * any extra state associated with the page. In this case - * clear modified/referenced bits. - */ -void -pmap_page_is_free(vm_page_t m) -{ - - m->md.pv_flags = 0; -} - /* * pmap_set_modified: * @@ -3307,44 +3068,16 @@ pmap_kextract(vm_offset_t va) else if (va >= MIPS_KSEG1_START && va < MIPS_KSEG2_START) pa = MIPS_KSEG1_TO_PHYS(va); -#ifdef VM_ALLOC_WIRED_TLB_PG_POOL - else if (need_wired_tlb_page_pool && ((va >= VM_MIN_KERNEL_ADDRESS) && - (va < (VM_MIN_KERNEL_ADDRESS + VM_KERNEL_ALLOC_OFFSET)))) - pa = MIPS_KSEG0_TO_PHYS(va); -#endif else if (va >= MIPS_KSEG2_START && va < VM_MAX_KERNEL_ADDRESS) { pt_entry_t *ptep; /* Is the kernel pmap initialized? */ if (kernel_pmap->pm_active) { - if (va >= (vm_offset_t)virtual_sys_start) { - /* Its inside the virtual address range */ - ptep = pmap_pte(kernel_pmap, va); - if (ptep) - pa = mips_tlbpfn_to_paddr(*ptep) | - (va & PAGE_MASK); - } else { - int i; - - /* - * its inside the special mapping area, I - * don't think this should happen, but if it - * does I want it toa all work right :-) - * Note if it does happen, we assume the - * caller has the lock? FIXME, this needs to - * be checked FIXEM - RRS. - */ - for (i = 0; i < MAXCPU; i++) { - if ((sysmap_lmem[i].valid1) && ((vm_offset_t)sysmap_lmem[i].CADDR1 == va)) { - pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP1); - break; - } - if ((sysmap_lmem[i].valid2) && ((vm_offset_t)sysmap_lmem[i].CADDR2 == va)) { - pa = mips_tlbpfn_to_paddr(sysmap_lmem[i].CMAP2); - break; - } - } - } + /* Its inside the virtual address range */ + ptep = pmap_pte(kernel_pmap, va); + if (ptep) + pa = mips_tlbpfn_to_paddr(*ptep) | + (va & PAGE_MASK); } } return pa; @@ -3358,7 +3091,7 @@ pmap_flush_pvcache(vm_page_t m) if (m != NULL) { for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = TAILQ_NEXT(pv, pv_list)) { - mips_dcache_wbinv_range_index(pv->pv_va, NBPG); + mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); } } } diff --git a/sys/mips/mips/psraccess.S b/sys/mips/mips/psraccess.S index 0bcb04d3d33..9b40a234458 100644 --- a/sys/mips/mips/psraccess.S +++ b/sys/mips/mips/psraccess.S @@ -53,109 +53,6 @@ .set noreorder # Noreorder is default style! -/* - * Set/clear software interrupt. - */ - -LEAF(setsoftintr0) - mfc0 v0, COP_0_CAUSE_REG # read cause register - nop - or v0, v0, SOFT_INT_MASK_0 # set soft clock interrupt - mtc0 v0, COP_0_CAUSE_REG # save it - j ra - nop -END(setsoftintr0) - -LEAF(clearsoftintr0) - mfc0 v0, COP_0_CAUSE_REG # read cause register - nop - and v0, v0, ~SOFT_INT_MASK_0 # clear soft clock interrupt - mtc0 v0, COP_0_CAUSE_REG # save it - j ra - nop -END(clearsoftintr0) - -LEAF(setsoftintr1) - mfc0 v0, COP_0_CAUSE_REG # read cause register - nop - or v0, v0, SOFT_INT_MASK_1 # set soft net interrupt - mtc0 v0, COP_0_CAUSE_REG # save it - j ra - nop -END(setsoftintr1) - -LEAF(clearsoftintr1) - mfc0 v0, COP_0_CAUSE_REG # read cause register - nop - and v0, v0, ~SOFT_INT_MASK_1 # clear soft net interrupt - mtc0 v0, COP_0_CAUSE_REG # save it - j ra - nop -END(clearsoftintr1) - -/* - * Set/change interrupt priority routines. - * These routines return the previous state. - */ -LEAF(restoreintr) - mfc0 t0,COP_0_STATUS_REG - and t1,t0,SR_INT_ENAB - beq a0,t1,1f - xor t0,SR_INT_ENAB - - .set noreorder - - mtc0 t0,COP_0_STATUS_REG - nop - nop - nop - nop -1: - j ra - nop -END(restoreintr) - -/* - * Set/change interrupt priority routines. - * These routines return the previous state. - */ - -LEAF(enableintr) -#ifdef TARGET_OCTEON - .set mips64r2 - ei v0 - and v0, SR_INT_ENAB # return old interrupt enable bit - .set mips0 -#else - mfc0 v0, COP_0_STATUS_REG # read status register - nop - or v1, v0, SR_INT_ENAB - mtc0 v1, COP_0_STATUS_REG # enable all interrupts - and v0, SR_INT_ENAB # return old interrupt enable -#endif - j ra - nop -END(enableintr) - - -LEAF(disableintr) -#ifdef TARGET_OCTEON - .set mips64r2 - di v0 - and v0, SR_INT_ENAB # return old interrupt enable bit - .set mips0 -#else - mfc0 v0, COP_0_STATUS_REG # read status register - nop - and v1, v0, ~SR_INT_ENAB - mtc0 v1, COP_0_STATUS_REG # disable all interrupts - MIPS_CPU_NOP_DELAY - and v0, SR_INT_ENAB # return old interrupt enable -#endif - j ra - nop -END(disableintr) - LEAF(set_intr_mask) li t0, SR_INT_MASK # 1 means masked so invert. not a0, a0 # 1 means masked so invert. @@ -182,17 +79,3 @@ LEAF(get_intr_mask) nop END(get_intr_mask) - -/* - * u_int32_t mips_cp0_config1_read(void) - * - * Return the current value of the CP0 Config (Select 1) register. - */ -LEAF(mips_cp0_config1_read) - .set push - .set mips32 - mfc0 v0, COP_0_CONFIG, 1 - j ra - nop - .set pop -END(mips_cp0_config1_read) diff --git a/sys/mips/mips/support.S b/sys/mips/mips/support.S index 2aed3e613fd..9cbc9982293 100644 --- a/sys/mips/mips/support.S +++ b/sys/mips/mips/support.S @@ -103,47 +103,22 @@ * Primitives */ -/* - * This table is indexed by u.u_pcb.pcb_onfault in trap(). - * The reason for using this table rather than storing an address in - * u.u_pcb.pcb_onfault is simply to make the code faster. - */ - .globl onfault_table - .data - .align 3 -onfault_table: - .word 0 # invalid index number -#define BADERR 1 - .word baderr -#define COPYERR 2 - .word copyerr -#define FSWBERR 3 - .word fswberr -#define FSWINTRBERR 4 - .word fswintrberr -#if defined(DDB) || defined(DEBUG) -#define DDBERR 5 - .word ddberr -#else - .word 0 -#endif - .text /* * See if access to addr with a len type instruction causes a machine check. - * len is length of access (1=byte, 2=short, 4=long) + * len is length of access (1=byte, 2=short, 4=int) * * badaddr(addr, len) * char *addr; * int len; */ LEAF(badaddr) - li v0, BADERR + PTR_LA v0, baderr GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) + PTR_L v1, PC_CURPCB(v1) bne a1, 1, 2f - sw v0, U_PCB_ONFAULT(v1) + PTR_S v0, U_PCB_ONFAULT(v1) b 5f lbu v0, (a0) 2: @@ -154,7 +129,7 @@ LEAF(badaddr) 4: lw v0, (a0) 5: - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero # made it w/o errors baderr: @@ -169,24 +144,24 @@ END(badaddr) * string is too long, return ENAMETOOLONG; else return 0. */ LEAF(copystr) - move t0, a2 - beq a2, zero, 4f + move t0, a2 + beq a2, zero, 4f 1: - lbu v0, 0(a0) - subu a2, a2, 1 - beq v0, zero, 2f - sb v0, 0(a1) # each byte until NIL - addu a0, a0, 1 - bne a2, zero, 1b # less than maxlen - addu a1, a1, 1 + lbu v0, 0(a0) + PTR_SUBU a2, a2, 1 + beq v0, zero, 2f + sb v0, 0(a1) # each byte until NIL + PTR_ADDU a0, a0, 1 + bne a2, zero, 1b # less than maxlen + PTR_ADDU a1, a1, 1 4: - li v0, ENAMETOOLONG # run out of space + li v0, ENAMETOOLONG # run out of space 2: - beq a3, zero, 3f # return num. of copied bytes - subu a2, t0, a2 # if the 4th arg was non-NULL - sw a2, 0(a3) + beq a3, zero, 3f # return num. of copied bytes + PTR_SUBU a2, t0, a2 # if the 4th arg was non-NULL + PTR_S a2, 0(a3) 3: - j ra # v0 is 0 or ENAMETOOLONG + j ra # v0 is 0 or ENAMETOOLONG nop END(copystr) @@ -196,12 +171,12 @@ END(copystr) */ LEAF(fillw) 1: - addiu a2, a2, -1 - sh a0, 0(a1) - bne a2,zero, 1b - addiu a1, a1, 2 + PTR_ADDU a2, a2, -1 + sh a0, 0(a1) + bne a2,zero, 1b + PTR_ADDU a1, a1, 2 - jr ra + jr ra nop END(fillw) @@ -210,13 +185,13 @@ END(fillw) * mem_zero_page(addr); */ LEAF(mem_zero_page) - li v0, NBPG + li v0, PAGE_SIZE 1: - subu v0, 8 - sd zero, 0(a0) - bne zero, v0, 1b - addu a0, 8 - jr ra + PTR_SUBU v0, 8 + sd zero, 0(a0) + bne zero, v0, 1b + PTR_ADDU a0, 8 + jr ra nop END(mem_zero_page) @@ -228,56 +203,56 @@ END(mem_zero_page) * a2 = count */ LEAF(insb) - beq a2, zero, 2f - addu a2, a1 + beq a2, zero, 2f + PTR_ADDU a2, a1 1: - lbu v0, 0(a0) - addiu a1, 1 - bne a1, a2, 1b - sb v0, -1(a1) + lbu v0, 0(a0) + PTR_ADDU a1, 1 + bne a1, a2, 1b + sb v0, -1(a1) 2: - jr ra + jr ra nop END(insb) LEAF(insw) - beq a2, zero, 2f - addu a2, a2 - addu a2, a1 + beq a2, zero, 2f + PTR_ADDU a2, a2 + PTR_ADDU a2, a1 1: - lhu v0, 0(a0) - addiu a1, 2 - bne a1, a2, 1b - sh v0, -2(a1) + lhu v0, 0(a0) + PTR_ADDU a1, 2 + bne a1, a2, 1b + sh v0, -2(a1) 2: - jr ra + jr ra nop END(insw) LEAF(insl) - beq a2, zero, 2f - sll a2, 2 - addu a2, a1 + beq a2, zero, 2f + sll a2, 2 + PTR_ADDU a2, a1 1: - lw v0, 0(a0) - addiu a1, 4 - bne a1, a2, 1b - sw v0, -4(a1) + lw v0, 0(a0) + PTR_ADDU a1, 4 + bne a1, a2, 1b + sw v0, -4(a1) 2: - jr ra + jr ra nop END(insl) LEAF(outsb) - beq a2, zero, 2f - addu a2, a1 + beq a2, zero, 2f + PTR_ADDU a2, a1 1: - lbu v0, 0(a1) - addiu a1, 1 - bne a1, a2, 1b - sb v0, 0(a0) + lbu v0, 0(a1) + PTR_ADDU a1, 1 + bne a1, a2, 1b + sb v0, 0(a0) 2: - jr ra + jr ra nop END(outsb) @@ -343,22 +318,22 @@ END(outsl) * u_int maxlength; * u_int *lencopied; */ -NON_LEAF(copyinstr, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) - sw ra, STAND_RA_OFFSET(sp) +NON_LEAF(copyinstr, CALLFRAME_SIZ, ra) + PTR_SUBU sp, sp, CALLFRAME_SIZ + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) + PTR_LA v0, copyerr blt a0, zero, _C_LABEL(copyerr) # make sure address is in user space - li v0, COPYERR + REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) + PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(copystr) - sw v0, U_PCB_ONFAULT(v1) - lw ra, STAND_RA_OFFSET(sp) + PTR_S v0, U_PCB_ONFAULT(v1) + REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw zero, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra - addu sp, sp, STAND_FRAME_SIZE + PTR_ADDU sp, sp, CALLFRAME_SIZ END(copyinstr) /* @@ -371,22 +346,22 @@ END(copyinstr) * u_int maxlength; * u_int *lencopied; */ -NON_LEAF(copyoutstr, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) - sw ra, STAND_RA_OFFSET(sp) +NON_LEAF(copyoutstr, CALLFRAME_SIZ, ra) + PTR_SUBU sp, sp, CALLFRAME_SIZ + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) + PTR_LA v0, copyerr blt a1, zero, _C_LABEL(copyerr) # make sure address is in user space - li v0, COPYERR + REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) + PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(copystr) - sw v0, U_PCB_ONFAULT(v1) - lw ra, STAND_RA_OFFSET(sp) + PTR_S v0, U_PCB_ONFAULT(v1) + REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw zero, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra - addu sp, sp, STAND_FRAME_SIZE + PTR_ADDU sp, sp, CALLFRAME_SIZ END(copyoutstr) /* @@ -396,21 +371,21 @@ END(copyoutstr) * caddr_t *to; (kernel destination address) * unsigned len; */ -NON_LEAF(copyin, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) - sw ra, STAND_RA_OFFSET(sp) +NON_LEAF(copyin, CALLFRAME_SIZ, ra) + PTR_SUBU sp, sp, CALLFRAME_SIZ + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) + PTR_LA v0, copyerr blt a0, zero, _C_LABEL(copyerr) # make sure address is in user space - li v0, COPYERR + REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) + PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(bcopy) - sw v0, U_PCB_ONFAULT(v1) - lw ra, STAND_RA_OFFSET(sp) + PTR_S v0, U_PCB_ONFAULT(v1) + REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) # bcopy modified v1, so reload - sw zero, U_PCB_ONFAULT(v1) - addu sp, sp, STAND_FRAME_SIZE + PTR_L v1, PC_CURPCB(v1) # bcopy modified v1, so reload + PTR_S zero, U_PCB_ONFAULT(v1) + PTR_ADDU sp, sp, CALLFRAME_SIZ j ra move v0, zero END(copyin) @@ -422,31 +397,28 @@ END(copyin) * caddr_t *to; (user destination address) * unsigned len; */ -NON_LEAF(copyout, STAND_FRAME_SIZE, ra) - subu sp, sp, STAND_FRAME_SIZE - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) - sw ra, STAND_RA_OFFSET(sp) +NON_LEAF(copyout, CALLFRAME_SIZ, ra) + PTR_SUBU sp, sp, CALLFRAME_SIZ + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) + PTR_LA v0, copyerr blt a1, zero, _C_LABEL(copyerr) # make sure address is in user space - li v0, COPYERR + REG_S ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) + PTR_L v1, PC_CURPCB(v1) jal _C_LABEL(bcopy) - sw v0, U_PCB_ONFAULT(v1) - lw ra, STAND_RA_OFFSET(sp) + PTR_S v0, U_PCB_ONFAULT(v1) + REG_L ra, CALLFRAME_RA(sp) GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) # bcopy modified v1, so reload - sw zero, U_PCB_ONFAULT(v1) - addu sp, sp, STAND_FRAME_SIZE + PTR_L v1, PC_CURPCB(v1) # bcopy modified v1, so reload + PTR_S zero, U_PCB_ONFAULT(v1) + PTR_ADDU sp, sp, CALLFRAME_SIZ j ra move v0, zero END(copyout) LEAF(copyerr) - lw ra, STAND_RA_OFFSET(sp) - GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw zero, U_PCB_ONFAULT(v1) - addu sp, sp, STAND_FRAME_SIZE + REG_L ra, CALLFRAME_RA(sp) + PTR_ADDU sp, sp, CALLFRAME_SIZ j ra li v0, EFAULT # return error END(copyerr) @@ -460,51 +432,55 @@ END(copyerr) LEAF(fuword) ALEAF(fuword32) ALEAF(fuiword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) lw v0, 0(a0) # fetch word j ra - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) END(fuword) LEAF(fusword) ALEAF(fuisword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) lhu v0, 0(a0) # fetch short j ra - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) END(fusword) LEAF(fubyte) ALEAF(fuibyte) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) lbu v0, 0(a0) # fetch byte j ra - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) END(fubyte) LEAF(suword32) #ifndef __mips_n64 XLEAF(suword) #endif + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sw a1, 0(a0) # store word - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(suword32) @@ -512,13 +488,14 @@ END(suword32) #ifdef __mips_n64 LEAF(suword64) XLEAF(suword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sd a1, 0(a0) # store word - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(suword64) @@ -537,11 +514,12 @@ LEAF(casuword32) #ifndef __mips_n64 XLEAF(casuword) #endif + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) 1: move t0, a2 ll v0, 0(a0) @@ -555,7 +533,7 @@ XLEAF(casuword) 2: li v0, -1 3: - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) jr ra nop END(casuword32) @@ -563,11 +541,12 @@ END(casuword32) #ifdef __mips_n64 LEAF(casuword64) XLEAF(casuword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) 1: move t0, a2 lld v0, 0(a0) @@ -581,7 +560,7 @@ XLEAF(casuword) 2: li v0, -1 3: - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) jr ra nop END(casuword64) @@ -593,13 +572,14 @@ END(casuword64) * Have to flush instruction cache afterwards. */ LEAF(suiword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sw a1, 0(a0) # store word - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j _C_LABEL(Mips_SyncICache) # FlushICache sets v0 = 0. (Ugly) li a1, 4 # size of word END(suiword) @@ -610,26 +590,28 @@ END(suiword) */ LEAF(susword) ALEAF(suisword) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sh a1, 0(a0) # store short - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(susword) LEAF(subyte) ALEAF(suibyte) + PTR_LA v0, fswberr blt a0, zero, fswberr # make sure address is in user space - li v0, FSWBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sb a1, 0(a0) # store byte - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(subyte) @@ -645,24 +627,26 @@ END(fswberr) * The important thing is to prevent sleep() and switch(). */ LEAF(fuswintr) + PTR_LA v0, fswintrberr blt a0, zero, fswintrberr # make sure address is in user space - li v0, FSWINTRBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) lhu v0, 0(a0) # fetch short j ra - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) END(fuswintr) LEAF(suswintr) + PTR_LA v0, fswintrberr blt a0, zero, fswintrberr # make sure address is in user space - li v0, FSWINTRBERR + nop GET_CPU_PCPU(v1) - lw v1, PC_CURPCB(v1) - sw v0, U_PCB_ONFAULT(v1) + PTR_L v1, PC_CURPCB(v1) + PTR_S v0, U_PCB_ONFAULT(v1) sh a1, 0(a0) # store short - sw zero, U_PCB_ONFAULT(v1) + PTR_S zero, U_PCB_ONFAULT(v1) j ra move v0, zero END(suswintr) @@ -672,111 +656,6 @@ LEAF(fswintrberr) li v0, -1 END(fswintrberr) -/* - * Insert 'p' after 'q'. - * _insque(p, q) - * caddr_t p, q; - */ -LEAF(_insque) - lw v0, 0(a1) # v0 = q->next - sw a1, 4(a0) # p->prev = q - sw v0, 0(a0) # p->next = q->next - sw a0, 4(v0) # q->next->prev = p - j ra - sw a0, 0(a1) # q->next = p -END(_insque) - -/* - * Remove item 'p' from queue. - * _remque(p) - * caddr_t p; - */ -LEAF(_remque) - lw v0, 0(a0) # v0 = p->next - lw v1, 4(a0) # v1 = p->prev - nop - sw v0, 0(v1) # p->prev->next = p->next - j ra - sw v1, 4(v0) # p->next->prev = p->prev -END(_remque) - -/*-------------------------------------------------------------------------- - * - * Mips_GetCOUNT -- - * - * Mips_GetCOUNT() - * - * Results: - * Returns the current COUNT reg. - * - * Side effects: - * None. - * - *-------------------------------------------------------------------------- - */ -LEAF(Mips_GetCOUNT) - mfc0 v0, COP_0_COUNT - nop #??? - nop #??? - j ra - nop -END(Mips_GetCOUNT) - -/*-------------------------------------------------------------------------- - * - * Mips_SetCOMPARE -- - * - * Mips_SetCOMPARE() - * - * Results: - * Sets a new value to the COMPARE register. - * - * Side effects: - * The COMPARE equal interrupt is acknowledged. - * - *-------------------------------------------------------------------------- - */ -LEAF(Mips_SetCOMPARE) - mtc0 a0, COP_0_COMPARE - j ra - nop -END(Mips_SetCOMPARE) - -LEAF(Mips_GetCOMPARE) - mfc0 v0, COP_0_COMPARE - j ra - nop -END(Mips_GetCOMPARE) - -/* - * u_int32_t mips_cp0_status_read(void) - * - * Return the current value of the CP0 Status register. - */ -LEAF(mips_cp0_status_read) - mfc0 v0, COP_0_STATUS_REG - j ra - nop -END(mips_cp0_status_read) - -/* - * void mips_cp0_status_write(u_int32_t) - * - * Set the value of the CP0 Status register. - * - * Note: This is almost certainly not the way you want to write a - * "permanent" value to to the CP0 Status register, since it gets - * saved in trap frames and restores. - */ -LEAF(mips_cp0_status_write) - mtc0 a0, COP_0_STATUS_REG - nop - nop - j ra - nop -END(mips_cp0_status_write) - - /* * memcpy(to, from, len) * {ov}bcopy(from, to, len) @@ -789,7 +668,7 @@ LEAF(memcpy) ALEAF(bcopy) ALEAF(ovbcopy) .set noreorder - addu t0, a0, a2 # t0 = end of s1 region + PTR_ADDU t0, a0, a2 # t0 = end of s1 region sltu t1, a1, t0 sltu t2, a0, a1 and t1, t1, t2 # t1 = true if from < to < (from+len) @@ -797,11 +676,11 @@ ALEAF(ovbcopy) slt t2, a2, 12 # check for small copy ble a2, zero, 2f - addu t1, a1, a2 # t1 = end of to region + PTR_ADDU t1, a1, a2 # t1 = end of to region 1: lb v1, -1(t0) # copy bytes backwards, - subu t0, t0, 1 # doesnt happen often so do slow way - subu t1, t1, 1 + PTR_SUBU t0, t0, 1 # doesnt happen often so do slow way + PTR_SUBU t1, t1, 1 bne t0, a0, 1b sb v1, 0(t1) 2: @@ -811,59 +690,59 @@ forward: bne t2, zero, smallcpy # do a small bcopy xor v1, a0, a1 # compare low two bits of addresses and v1, v1, 3 - subu a3, zero, a1 # compute # bytes to word align address + PTR_SUBU a3, zero, a1 # compute # bytes to word align address beq v1, zero, aligned # addresses can be word aligned and a3, a3, 3 beq a3, zero, 1f - subu a2, a2, a3 # subtract from remaining count + PTR_SUBU a2, a2, a3 # subtract from remaining count LWHI v1, 0(a0) # get next 4 bytes (unaligned) LWLO v1, 3(a0) - addu a0, a0, a3 + PTR_ADDU a0, a0, a3 SWHI v1, 0(a1) # store 1, 2, or 3 bytes to align a1 - addu a1, a1, a3 + PTR_ADDU a1, a1, a3 1: and v1, a2, 3 # compute number of words left - subu a3, a2, v1 + PTR_SUBU a3, a2, v1 move a2, v1 - addu a3, a3, a0 # compute ending address + PTR_ADDU a3, a3, a0 # compute ending address 2: LWHI v1, 0(a0) # copy words a0 unaligned, a1 aligned LWLO v1, 3(a0) - addu a0, a0, 4 + PTR_ADDU a0, a0, 4 sw v1, 0(a1) - addu a1, a1, 4 + PTR_ADDU a1, a1, 4 bne a0, a3, 2b nop # We have to do this mmu-bug. b smallcpy nop aligned: beq a3, zero, 1f - subu a2, a2, a3 # subtract from remaining count + PTR_SUBU a2, a2, a3 # subtract from remaining count LWHI v1, 0(a0) # copy 1, 2, or 3 bytes to align - addu a0, a0, a3 + PTR_ADDU a0, a0, a3 SWHI v1, 0(a1) - addu a1, a1, a3 + PTR_ADDU a1, a1, a3 1: and v1, a2, 3 # compute number of whole words left - subu a3, a2, v1 + PTR_SUBU a3, a2, v1 move a2, v1 - addu a3, a3, a0 # compute ending address + PTR_ADDU a3, a3, a0 # compute ending address 2: lw v1, 0(a0) # copy words - addu a0, a0, 4 + PTR_ADDU a0, a0, 4 sw v1, 0(a1) bne a0, a3, 2b - addu a1, a1, 4 + PTR_ADDU a1, a1, 4 smallcpy: ble a2, zero, 2f - addu a3, a2, a0 # compute ending address + PTR_ADDU a3, a2, a0 # compute ending address 1: lbu v1, 0(a0) # copy bytes - addu a0, a0, 1 + PTR_ADDU a0, a0, 1 sb v1, 0(a1) bne a0, a3, 1b - addu a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!! + PTR_ADDU a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!! 2: j ra nop @@ -883,19 +762,19 @@ LEAF(memset) sll t2, t1, 16 # shift that left 16 or t1, t2, t1 # or together - subu t0, zero, a0 # compute # bytes to word align address + PTR_SUBU t0, zero, a0 # compute # bytes to word align address and t0, t0, 3 beq t0, zero, 1f # skip if word aligned - subu a2, a2, t0 # subtract from remaining count + PTR_SUBU a2, a2, t0 # subtract from remaining count SWHI t1, 0(a0) # store 1, 2, or 3 bytes to align - addu a0, a0, t0 + PTR_ADDU a0, a0, t0 1: and v1, a2, 3 # compute number of whole words left - subu t0, a2, v1 - subu a2, a2, t0 - addu t0, t0, a0 # compute ending address + PTR_SUBU t0, a2, v1 + PTR_SUBU a2, a2, t0 + PTR_ADDU t0, t0, a0 # compute ending address 2: - addu a0, a0, 4 # clear words + PTR_ADDU a0, a0, 4 # clear words #ifdef MIPS3_5900 nop nop @@ -907,9 +786,9 @@ LEAF(memset) memsetsmallclr: ble a2, zero, 2f - addu t0, a2, a0 # compute ending address + PTR_ADDU t0, a2, a0 # compute ending address 1: - addu a0, a0, 1 # clear bytes + PTR_ADDU a0, a0, 1 # clear bytes #ifdef MIPS3_5900 nop nop @@ -931,26 +810,26 @@ LEAF(bzero) ALEAF(blkclr) .set noreorder blt a1, 12, smallclr # small amount to clear? - subu a3, zero, a0 # compute # bytes to word align address + PTR_SUBU a3, zero, a0 # compute # bytes to word align address and a3, a3, 3 beq a3, zero, 1f # skip if word aligned - subu a1, a1, a3 # subtract from remaining count + PTR_SUBU a1, a1, a3 # subtract from remaining count SWHI zero, 0(a0) # clear 1, 2, or 3 bytes to align - addu a0, a0, a3 + PTR_ADDU a0, a0, a3 1: and v0, a1, 3 # compute number of words left - subu a3, a1, v0 + PTR_SUBU a3, a1, v0 move a1, v0 - addu a3, a3, a0 # compute ending address + PTR_ADDU a3, a3, a0 # compute ending address 2: - addu a0, a0, 4 # clear words + PTR_ADDU a0, a0, 4 # clear words bne a0, a3, 2b # unrolling loop does not help sw zero, -4(a0) # since we are limited by memory speed smallclr: ble a1, zero, 2f - addu a3, a1, a0 # compute ending address + PTR_ADDU a3, a1, a0 # compute ending address 1: - addu a0, a0, 1 # clear bytes + PTR_ADDU a0, a0, 1 # clear bytes bne a0, a3, 1b sb zero, -1(a0) 2: @@ -967,66 +846,66 @@ LEAF(bcmp) blt a2, 16, smallcmp # is it worth any trouble? xor v0, a0, a1 # compare low two bits of addresses and v0, v0, 3 - subu a3, zero, a1 # compute # bytes to word align address + PTR_SUBU a3, zero, a1 # compute # bytes to word align address bne v0, zero, unalignedcmp # not possible to align addresses and a3, a3, 3 beq a3, zero, 1f - subu a2, a2, a3 # subtract from remaining count + PTR_SUBU a2, a2, a3 # subtract from remaining count move v0, v1 # init v0,v1 so unmodified bytes match LWHI v0, 0(a0) # read 1, 2, or 3 bytes LWHI v1, 0(a1) - addu a1, a1, a3 + PTR_ADDU a1, a1, a3 bne v0, v1, nomatch - addu a0, a0, a3 + PTR_ADDU a0, a0, a3 1: and a3, a2, ~3 # compute number of whole words left - subu a2, a2, a3 # which has to be >= (16-3) & ~3 - addu a3, a3, a0 # compute ending address + PTR_SUBU a2, a2, a3 # which has to be >= (16-3) & ~3 + PTR_ADDU a3, a3, a0 # compute ending address 2: lw v0, 0(a0) # compare words lw v1, 0(a1) - addu a0, a0, 4 + PTR_ADDU a0, a0, 4 bne v0, v1, nomatch - addu a1, a1, 4 + PTR_ADDU a1, a1, 4 bne a0, a3, 2b nop b smallcmp # finish remainder nop unalignedcmp: beq a3, zero, 2f - subu a2, a2, a3 # subtract from remaining count - addu a3, a3, a0 # compute ending address + PTR_SUBU a2, a2, a3 # subtract from remaining count + PTR_ADDU a3, a3, a0 # compute ending address 1: lbu v0, 0(a0) # compare bytes until a1 word aligned lbu v1, 0(a1) - addu a0, a0, 1 + PTR_ADDU a0, a0, 1 bne v0, v1, nomatch - addu a1, a1, 1 + PTR_ADDU a1, a1, 1 bne a0, a3, 1b nop 2: and a3, a2, ~3 # compute number of whole words left - subu a2, a2, a3 # which has to be >= (16-3) & ~3 - addu a3, a3, a0 # compute ending address + PTR_SUBU a2, a2, a3 # which has to be >= (16-3) & ~3 + PTR_ADDU a3, a3, a0 # compute ending address 3: LWHI v0, 0(a0) # compare words a0 unaligned, a1 aligned LWLO v0, 3(a0) lw v1, 0(a1) - addu a0, a0, 4 + PTR_ADDU a0, a0, 4 bne v0, v1, nomatch - addu a1, a1, 4 + PTR_ADDU a1, a1, 4 bne a0, a3, 3b nop smallcmp: ble a2, zero, match - addu a3, a2, a0 # compute ending address + PTR_ADDU a3, a2, a0 # compute ending address 1: lbu v0, 0(a0) lbu v1, 0(a1) - addu a0, a0, 1 + PTR_ADDU a0, a0, 1 bne v0, v1, nomatch - addu a1, a1, 1 + PTR_ADDU a1, a1, 1 bne a0, a3, 1b nop match: @@ -1367,9 +1246,6 @@ END(atomic_subtract_8) */ .set noreorder # Noreorder is default style! -#ifndef _MIPS_ARCH_XLR - .set mips3 -#endif #if !defined(__mips_n64) && !defined(__mips_n32) /* @@ -1426,22 +1302,22 @@ END(atomic_load_64) #if defined(DDB) || defined(DEBUG) LEAF(kdbpeek) - li v1, DDBERR + PTR_LA v1, ddberr and v0, a0, 3 # unaligned ? GET_CPU_PCPU(t1) - lw t1, PC_CURPCB(t1) + PTR_L t1, PC_CURPCB(t1) bne v0, zero, 1f - sw v1, U_PCB_ONFAULT(t1) + PTR_S v1, U_PCB_ONFAULT(t1) lw v0, (a0) jr ra - sw zero, U_PCB_ONFAULT(t1) + PTR_S zero, U_PCB_ONFAULT(t1) 1: LWHI v0, 0(a0) LWLO v0, 3(a0) jr ra - sw zero, U_PCB_ONFAULT(t1) + PTR_S zero, U_PCB_ONFAULT(t1) END(kdbpeek) ddberr: @@ -1450,44 +1326,31 @@ ddberr: #if defined(DDB) LEAF(kdbpoke) - li v1, DDBERR + PTR_LA v1, ddberr and v0, a0, 3 # unaligned ? GET_CPU_PCPU(t1) - lw t1, PC_CURPCB(t1) + PTR_L t1, PC_CURPCB(t1) bne v0, zero, 1f - sw v1, U_PCB_ONFAULT(t1) + PTR_S v1, U_PCB_ONFAULT(t1) sw a1, (a0) jr ra - sw zero, U_PCB_ONFAULT(t1) + PTR_S zero, U_PCB_ONFAULT(t1) 1: SWHI a1, 0(a0) SWLO a1, 3(a0) jr ra - sw zero, U_PCB_ONFAULT(t1) + PTR_S zero, U_PCB_ONFAULT(t1) END(kdbpoke) .data .globl esym esym: .word 0 -#ifndef _MIPS_ARCH_XLR - .set mips2 -#endif #endif /* DDB */ #endif /* DDB || DEBUG */ -#ifndef MIPS_ISAIII -#define STORE sw /* 32 bit mode regsave instruction */ -#define LOAD lw /* 32 bit mode regload instruction */ -#define RSIZE 4 /* 32 bit mode register size */ -#else -#define STORE sd /* 64 bit mode regsave instruction */ -#define LOAD ld /* 64 bit mode regload instruction */ -#define RSIZE 8 /* 64 bit mode register size */ -#endif - #define ITLBNOPFIX nop;nop;nop;nop;nop;nop;nop;nop;nop;nop; .text @@ -1499,35 +1362,35 @@ LEAF(breakpoint) LEAF(setjmp) mfc0 v0, COP_0_STATUS_REG # Later the "real" spl value! - STORE s0, (RSIZE * PREG_S0)(a0) - STORE s1, (RSIZE * PREG_S1)(a0) - STORE s2, (RSIZE * PREG_S2)(a0) - STORE s3, (RSIZE * PREG_S3)(a0) - STORE s4, (RSIZE * PREG_S4)(a0) - STORE s5, (RSIZE * PREG_S5)(a0) - STORE s6, (RSIZE * PREG_S6)(a0) - STORE s7, (RSIZE * PREG_S7)(a0) - STORE s8, (RSIZE * PREG_S8)(a0) - STORE sp, (RSIZE * PREG_SP)(a0) - STORE ra, (RSIZE * PREG_RA)(a0) - STORE v0, (RSIZE * PREG_SR)(a0) + REG_S s0, (SZREG * PREG_S0)(a0) + REG_S s1, (SZREG * PREG_S1)(a0) + REG_S s2, (SZREG * PREG_S2)(a0) + REG_S s3, (SZREG * PREG_S3)(a0) + REG_S s4, (SZREG * PREG_S4)(a0) + REG_S s5, (SZREG * PREG_S5)(a0) + REG_S s6, (SZREG * PREG_S6)(a0) + REG_S s7, (SZREG * PREG_S7)(a0) + REG_S s8, (SZREG * PREG_S8)(a0) + REG_S sp, (SZREG * PREG_SP)(a0) + REG_S ra, (SZREG * PREG_RA)(a0) + REG_S v0, (SZREG * PREG_SR)(a0) jr ra li v0, 0 # setjmp return END(setjmp) LEAF(longjmp) - LOAD v0, (RSIZE * PREG_SR)(a0) - LOAD ra, (RSIZE * PREG_RA)(a0) - LOAD s0, (RSIZE * PREG_S0)(a0) - LOAD s1, (RSIZE * PREG_S1)(a0) - LOAD s2, (RSIZE * PREG_S2)(a0) - LOAD s3, (RSIZE * PREG_S3)(a0) - LOAD s4, (RSIZE * PREG_S4)(a0) - LOAD s5, (RSIZE * PREG_S5)(a0) - LOAD s6, (RSIZE * PREG_S6)(a0) - LOAD s7, (RSIZE * PREG_S7)(a0) - LOAD s8, (RSIZE * PREG_S8)(a0) - LOAD sp, (RSIZE * PREG_SP)(a0) + REG_L v0, (SZREG * PREG_SR)(a0) + REG_L ra, (SZREG * PREG_RA)(a0) + REG_L s0, (SZREG * PREG_S0)(a0) + REG_L s1, (SZREG * PREG_S1)(a0) + REG_L s2, (SZREG * PREG_S2)(a0) + REG_L s3, (SZREG * PREG_S3)(a0) + REG_L s4, (SZREG * PREG_S4)(a0) + REG_L s5, (SZREG * PREG_S5)(a0) + REG_L s6, (SZREG * PREG_S6)(a0) + REG_L s7, (SZREG * PREG_S7)(a0) + REG_L s8, (SZREG * PREG_S8)(a0) + REG_L sp, (SZREG * PREG_SP)(a0) mtc0 v0, COP_0_STATUS_REG # Later the "real" spl value! ITLBNOPFIX jr ra @@ -1538,7 +1401,6 @@ LEAF(fusufault) GET_CPU_PCPU(t0) lw t0, PC_CURTHREAD(t0) lw t0, TD_PCB(t0) - sw zero, U_PCB_ONFAULT(t0) li v0, -1 j ra END(fusufault) @@ -1547,8 +1409,7 @@ END(fusufault) a pointer that is in user space. It will be used as the basic primitive for a kernel supported user space lock implementation. */ LEAF(casuptr) - - li t0, VM_MAXUSER_ADDRESS /* verify address validity */ + PTR_LI t0, VM_MAXUSER_ADDRESS /* verify address validity */ blt a0, t0, fusufault /* trap faults */ nop @@ -1556,8 +1417,8 @@ LEAF(casuptr) lw t1, PC_CURTHREAD(t1) lw t1, TD_PCB(t1) - lw t2, fusufault - sw t2, U_PCB_ONFAULT(t1) + PTR_LA t2, fusufault + PTR_S t2, U_PCB_ONFAULT(t1) 1: ll v0, 0(a0) /* try to load the old value */ beq v0, a1, 2f /* compare */ @@ -1565,7 +1426,7 @@ LEAF(casuptr) sc t0, 0(a0) /* write if address still locked */ beq t0, zero, 1b /* if it failed, spin */ 2: - sw zero, U_PCB_ONFAULT(t1) /* clean up */ + PTR_S zero, U_PCB_ONFAULT(t1) /* clean up */ j ra END(casuptr) @@ -1593,7 +1454,7 @@ END(octeon_get_shadow) * octeon_set_control(addr, uint32_t val) */ LEAF(octeon_set_control) - .set mips64r2 + .set push or t1, a1, zero /* dmfc0 a1, 9, 7*/ .word 0x40254807 @@ -1603,20 +1464,21 @@ LEAF(octeon_set_control) .word 0x40a54807 jr ra nop - .set mips0 + .set pop END(octeon_set_control) /* * octeon_get_control(addr) */ LEAF(octeon_get_control) + .set push .set mips64r2 /* dmfc0 a1, 9, 7 */ .word 0x40254807 sd a1, 0(a0) jr ra nop - .set mips0 + .set pop END(octeon_get_control) #endif diff --git a/sys/mips/mips/swtch.S b/sys/mips/mips/swtch.S index dd66ecec684..f287476a3b3 100644 --- a/sys/mips/mips/swtch.S +++ b/sys/mips/mips/swtch.S @@ -65,53 +65,7 @@ #include "assym.s" -#if defined(ISA_MIPS32) -#undef WITH_64BIT_CP0 -#elif defined(ISA_MIPS64) -#define WITH_64BIT_CP0 -#elif defined(ISA_MIPS3) -#define WITH_64BIT_CP0 -#else -#error "Please write the code for this ISA" -#endif - -#ifdef WITH_64BIT_CP0 -#define _SLL dsll -#define _SRL dsrl -#define _MFC0 dmfc0 -#define _MTC0 dmtc0 -#define WIRED_SHIFT 34 -#else -#define _SLL sll -#define _SRL srl -#define _MFC0 mfc0 -#define _MTC0 mtc0 -#define WIRED_SHIFT 2 -#endif .set noreorder # Noreorder is default style! -#if defined(ISA_MIPS32) - .set mips32 -#elif defined(ISA_MIPS64) - .set mips64 -#elif defined(ISA_MIPS3) - .set mips3 -#endif - -#if defined(ISA_MIPS32) -#define STORE sw /* 32 bit mode regsave instruction */ -#define LOAD lw /* 32 bit mode regload instruction */ -#define RSIZE 4 /* 32 bit mode register size */ -#define STORE_FP swc1 /* 32 bit mode fp regsave instruction */ -#define LOAD_FP lwc1 /* 32 bit mode fp regload instruction */ -#define FP_RSIZE 4 /* 32 bit mode fp register size */ -#else -#define STORE sd /* 64 bit mode regsave instruction */ -#define LOAD ld /* 64 bit mode regload instruction */ -#define RSIZE 8 /* 64 bit mode register size */ -#define STORE_FP sdc1 /* 64 bit mode fp regsave instruction */ -#define LOAD_FP ldc1 /* 64 bit mode fp regload instruction */ -#define FP_RSIZE 8 /* 64 bit mode fp register size */ -#endif /* * FREEBSD_DEVELOPERS_FIXME @@ -125,28 +79,28 @@ #endif #define SAVE_U_PCB_REG(reg, offs, base) \ - STORE reg, U_PCB_REGS + (RSIZE * offs) (base) + REG_S reg, U_PCB_REGS + (SZREG * offs) (base) #define RESTORE_U_PCB_REG(reg, offs, base) \ - LOAD reg, U_PCB_REGS + (RSIZE * offs) (base) + REG_L reg, U_PCB_REGS + (SZREG * offs) (base) #define SAVE_U_PCB_FPREG(reg, offs, base) \ - STORE_FP reg, U_PCB_FPREGS + (FP_RSIZE * offs) (base) + FP_S reg, U_PCB_FPREGS + (SZFPREG * offs) (base) #define RESTORE_U_PCB_FPREG(reg, offs, base) \ - LOAD_FP reg, U_PCB_FPREGS + (FP_RSIZE * offs) (base) + FP_L reg, U_PCB_FPREGS + (SZFPREG * offs) (base) #define SAVE_U_PCB_FPSR(reg, offs, base) \ - STORE reg, U_PCB_FPREGS + (FP_RSIZE * offs) (base) + REG_S reg, U_PCB_FPREGS + (SZFPREG * offs) (base) #define RESTORE_U_PCB_FPSR(reg, offs, base) \ - LOAD reg, U_PCB_FPREGS + (FP_RSIZE * offs) (base) + REG_L reg, U_PCB_FPREGS + (SZFPREG * offs) (base) #define SAVE_U_PCB_CONTEXT(reg, offs, base) \ - STORE reg, U_PCB_CONTEXT + (RSIZE * offs) (base) + REG_S reg, U_PCB_CONTEXT + (SZREG * offs) (base) #define RESTORE_U_PCB_CONTEXT(reg, offs, base) \ - LOAD reg, U_PCB_CONTEXT + (RSIZE * offs) (base) + REG_L reg, U_PCB_CONTEXT + (SZREG * offs) (base) #define ITLBNOPFIX nop;nop;nop;nop;nop;nop;nop;nop;nop;nop; @@ -172,7 +126,7 @@ LEAF(fork_trampoline) */ .set noat GET_CPU_PCPU(k1) - lw k1, PC_CURPCB(k1) + PTR_L k1, PC_CURPCB(k1) RESTORE_U_PCB_REG(t0, MULLO, k1) RESTORE_U_PCB_REG(t1, MULHI, k1) @@ -181,7 +135,7 @@ LEAF(fork_trampoline) RESTORE_U_PCB_REG(a0, PC, k1) RESTORE_U_PCB_REG(AT, AST, k1) RESTORE_U_PCB_REG(v0, V0, k1) - _MTC0 a0, COP_0_EXC_PC # set return address + MTC0 a0, COP_0_EXC_PC # set return address RESTORE_U_PCB_REG(v1, V1, k1) RESTORE_U_PCB_REG(a0, A0, k1) @@ -265,7 +219,7 @@ END(savectx) KSEG0TEXT_START; -NON_LEAF(mips_cpu_throw, STAND_FRAME_SIZE, ra) +NON_LEAF(mips_cpu_throw, CALLFRAME_SIZ, ra) mfc0 t0, COP_0_STATUS_REG # t0 = saved status register nop nop @@ -285,7 +239,7 @@ END(mips_cpu_throw) * a2 - mtx * Find the highest priority process and resume it. */ -NON_LEAF(cpu_switch, STAND_FRAME_SIZE, ra) +NON_LEAF(cpu_switch, CALLFRAME_SIZ, ra) mfc0 t0, COP_0_STATUS_REG # t0 = saved status register nop nop @@ -294,11 +248,11 @@ NON_LEAF(cpu_switch, STAND_FRAME_SIZE, ra) ITLBNOPFIX beqz a0, mips_sw1 move a3, a0 - lw a0, TD_PCB(a0) # load PCB addr of curproc + PTR_L a0, TD_PCB(a0) # load PCB addr of curproc SAVE_U_PCB_CONTEXT(sp, PREG_SP, a0) # save old sp - subu sp, sp, STAND_FRAME_SIZE - sw ra, STAND_RA_OFFSET(sp) - .mask 0x80000000, (STAND_RA_OFFSET - STAND_FRAME_SIZE) + PTR_SUBU sp, sp, CALLFRAME_SIZ + REG_S ra, CALLFRAME_RA(sp) + .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) SAVE_U_PCB_CONTEXT(s0, PREG_S0, a0) # do a 'savectx()' SAVE_U_PCB_CONTEXT(s1, PREG_S1, a0) SAVE_U_PCB_CONTEXT(s2, PREG_S2, a0) @@ -321,13 +275,13 @@ getpc: * to be saved with the other registers do so here. */ - sw a2, TD_LOCK(a3) # Switchout td_lock + PTR_S a2, TD_LOCK(a3) # Switchout td_lock mips_sw1: #if defined(SMP) && defined(SCHED_ULE) PTR_LA t0, _C_LABEL(blocked_lock) blocked_loop: - lw t1, TD_LOCK(a1) + PTR_L t1, TD_LOCK(a1) beq t0, t1, blocked_loop nop #endif @@ -336,20 +290,16 @@ blocked_loop: * Switch to new context. */ GET_CPU_PCPU(a3) - sw a1, PC_CURTHREAD(a3) - lw a2, TD_PCB(a1) - sw a2, PC_CURPCB(a3) - lw v0, TD_REALKSTACK(a1) - li s0, (MIPS_KSEG2_START+VM_KERNEL_ALLOC_OFFSET) # If Uarea addr is below kseg2, + PTR_S a1, PC_CURTHREAD(a3) + PTR_L a2, TD_PCB(a1) + PTR_S a2, PC_CURPCB(a3) + PTR_L v0, TD_KSTACK(a1) +#if !defined(__mips_n64) + PTR_LI s0, MIPS_KSEG2_START # If Uarea addr is below kseg2, bltu v0, s0, sw2 # no need to insert in TLB. - lw a1, TD_UPTE+0(s7) # t0 = first u. pte - lw a2, TD_UPTE+4(s7) # t1 = 2nd u. pte - and s0, v0, PTE_ODDPG - beq s0, zero, entry0 - nop - - PANIC_KSEG0("USPACE sat on odd page boundary", t1) - +#endif + lw a1, TD_UPTE + 0(s7) # a1 = u. pte #0 + lw a2, TD_UPTE + 4(s7) # a2 = u. pte #1 /* * Wiredown the USPACE of newproc in TLB entry#0. Check whether target * USPACE is already in another place of TLB before that, and if so @@ -357,31 +307,32 @@ blocked_loop: * NOTE: This is hard coded to UPAGES == 2. * Also, there should be no TLB faults at this point. */ -entry0: - mtc0 v0, COP_0_TLB_HI # VPN = va + MTC0 v0, COP_0_TLB_HI # VPN = va HAZARD_DELAY tlbp # probe VPN HAZARD_DELAY mfc0 s0, COP_0_TLB_INDEX - nop -pgm: + HAZARD_DELAY + + PTR_LI t1, MIPS_KSEG0_START # invalidate tlb entry bltz s0, entry0set - li t1, MIPS_KSEG0_START # invalidate tlb entry + nop sll s0, PAGE_SHIFT + 1 addu t1, s0 - mtc0 t1, COP_0_TLB_HI + MTC0 t1, COP_0_TLB_HI mtc0 zero, COP_0_TLB_LO0 mtc0 zero, COP_0_TLB_LO1 HAZARD_DELAY tlbwi HAZARD_DELAY - mtc0 v0, COP_0_TLB_HI # set VPN again + MTC0 v0, COP_0_TLB_HI # set VPN again + entry0set: /* SMP!! - Works only for unshared TLB case - i.e. no v-cpus */ mtc0 zero, COP_0_TLB_INDEX # TLB entry #0 -# or a1, PG_G + HAZARD_DELAY mtc0 a1, COP_0_TLB_LO0 # upte[0] -# or a2, PG_G + HAZARD_DELAY mtc0 a2, COP_0_TLB_LO1 # upte[1] HAZARD_DELAY tlbwi # set TLB entry #0 @@ -396,7 +347,7 @@ sw2: /* * Restore registers and return. */ - lw a0, TD_PCB(s7) + PTR_L a0, TD_PCB(s7) RESTORE_U_PCB_CONTEXT(gp, PREG_GP, a0) RESTORE_U_PCB_CONTEXT(v0, PREG_SR, a0) # restore kernel context RESTORE_U_PCB_CONTEXT(ra, PREG_RA, a0) @@ -457,7 +408,7 @@ LEAF(MipsSwitchFPState) * First read out the status register to make sure that all FP operations * have completed. */ - lw a0, TD_PCB(a0) # get pointer to pcb for proc + PTR_L a0, TD_PCB(a0) # get pointer to pcb for proc cfc1 t0, FPC_CSR # stall til FP done cfc1 t0, FPC_CSR # now get status li t3, ~SR_COP_1_BIT @@ -567,13 +518,13 @@ END(MipsSwitchFPState) *---------------------------------------------------------------------------- */ LEAF(MipsSaveCurFPState) - lw a0, TD_PCB(a0) # get pointer to pcb for thread + PTR_L a0, TD_PCB(a0) # get pointer to pcb for thread mfc0 t1, COP_0_STATUS_REG # Disable interrupts and li t0, SR_COP_1_BIT # enable the coprocessor mtc0 t0, COP_0_STATUS_REG ITLBNOPFIX GET_CPU_PCPU(a1) - sw zero, PC_FPCURTHREAD(a1) # indicate state has been saved + PTR_S zero, PC_FPCURTHREAD(a1) # indicate state has been saved /* * First read out the status register to make sure that all FP operations * have completed. diff --git a/sys/mips/mips/tick.c b/sys/mips/mips/tick.c index 60b3511e0c0..5931a5b103d 100644 --- a/sys/mips/mips/tick.c +++ b/sys/mips/mips/tick.c @@ -295,22 +295,31 @@ clock_intr(void *arg) */ if (delta > cycles_per_hz) delta = cycles_per_hz; - +#ifdef KDTRACE_HOOKS + /* + * If the DTrace hooks are configured and a callback function + * has been registered, then call it to process the high speed + * timers. + */ + int cpu = PCPU_GET(cpuid); + if (cyclic_clock_func[cpu] != NULL) + (*cyclic_clock_func[cpu])(tf); +#endif /* Fire hardclock at hz. */ cpu_ticks->hard_ticks += delta; if (cpu_ticks->hard_ticks >= cycles_per_hz) { cpu_ticks->hard_ticks -= cycles_per_hz; if (PCPU_GET(cpuid) == 0) - hardclock(USERMODE(tf->sr), tf->pc); + hardclock(TRAPF_USERMODE(tf), tf->pc); else - hardclock_cpu(USERMODE(tf->sr)); + hardclock_cpu(TRAPF_USERMODE(tf)); } /* Fire statclock at stathz. */ cpu_ticks->stat_ticks += delta; if (cpu_ticks->stat_ticks >= cycles_per_stathz) { cpu_ticks->stat_ticks -= cycles_per_stathz; - statclock(USERMODE(tf->sr)); + statclock(TRAPF_USERMODE(tf)); } /* Fire profclock at profhz, but only when needed. */ @@ -318,7 +327,7 @@ clock_intr(void *arg) if (cpu_ticks->prof_ticks >= cycles_per_profhz) { cpu_ticks->prof_ticks -= cycles_per_profhz; if (profprocs != 0) - profclock(USERMODE(tf->sr), tf->pc); + profclock(TRAPF_USERMODE(tf), tf->pc); } critical_exit(); #if 0 /* TARGET_OCTEON */ diff --git a/sys/mips/mips/tlb.S b/sys/mips/mips/tlb.S index 46a15f81ca4..f6e7934ed3e 100644 --- a/sys/mips/mips/tlb.S +++ b/sys/mips/mips/tlb.S @@ -240,7 +240,7 @@ LEAF(Mips_TLBFlush) # MIPS_KSEG0_START + 2 * i * PAGE_SIZE; # One bogus value for every TLB entry might cause MCHECK exception # - sll t3, t1, PGSHIFT + 1 + sll t3, t1, PAGE_SHIFT + 1 li v0, MIPS_KSEG0_START # invalid address addu v0, t3 /* @@ -299,7 +299,7 @@ LEAF(Mips_TLBFlushAddr) # address calculated by following expression: # MIPS_KSEG0_START + 2 * i * PAGE_SIZE; # One bogus value for every TLB entry might cause MCHECK exception - sll v0, PGSHIFT + 1 + sll v0, PAGE_SHIFT + 1 addu t1, v0 _MTC0 t1, COP_0_TLB_HI # Mark entry high as invalid @@ -482,7 +482,7 @@ LEAF(mips_TBIAP) # MIPS_KSEG0_START + 2 * i * PAGE_SIZE; # One bogus value for every TLB entry might cause MCHECK exception # - sll t3, t1, PGSHIFT + 1 + sll t3, t1, PAGE_SHIFT + 1 li v0, MIPS_KSEG0_START # invalid address addu v0, t3 @@ -507,7 +507,7 @@ LEAF(mips_TBIAP) tlbwi # invalidate the TLB entry 2: addu t1, t1, 1 - addu v0, 1 << (PGSHIFT + 1) + addu v0, 1 << (PAGE_SHIFT + 1) bne t1, t2, 1b nop diff --git a/sys/mips/mips/trap.c b/sys/mips/mips/trap.c index 124087cc039..17a6be5e7fd 100644 --- a/sys/mips/mips/trap.c +++ b/sys/mips/mips/trap.c @@ -75,7 +75,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include @@ -83,8 +82,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #ifdef DDB @@ -102,8 +99,7 @@ __FBSDID("$FreeBSD$"); int trap_debug = 1; #endif -extern unsigned onfault_table[]; - +static void log_illegal_instruction(const char *, struct trapframe *); static void log_bad_page_fault(char *, struct trapframe *, int); static void log_frame_dump(struct trapframe *frame); static void get_mapping_info(vm_offset_t, pd_entry_t **, pt_entry_t **); @@ -229,8 +225,8 @@ void stacktrace(struct trapframe *); void logstacktrace(struct trapframe *); #endif -#define KERNLAND(x) ((int)(x) < 0) -#define DELAYBRANCH(x) ((int)(x) < 0) +#define KERNLAND(x) ((vm_offset_t)(x) >= VM_MIN_KERNEL_ADDRESS && (vm_offset_t)(x) < VM_MAX_KERNEL_ADDRESS) +#define DELAYBRANCH(x) ((int)(x) < 0) /* * MIPS load/store access type @@ -266,6 +262,7 @@ SYSCTL_INT(_vm, OID_AUTO, allow_unaligned_acc, CTLFLAG_RW, static int emulate_unaligned_access(struct trapframe *frame); extern char *syscallnames[]; +extern void fswintrberr(void); /* XXX */ /* * Handle an exception. @@ -274,7 +271,7 @@ extern char *syscallnames[]; * In the case of a kernel trap, we return the pc where to resume if * p->p_addr->u_pcb.pcb_onfault is set, otherwise, return old pc. */ -u_int +register_t trap(struct trapframe *trapframe) { int type, usermode; @@ -284,18 +281,17 @@ trap(struct trapframe *trapframe) struct proc *p = curproc; vm_prot_t ftype; pt_entry_t *pte; - unsigned int entry; pmap_t pmap; - int quad_syscall = 0; int access_type; ksiginfo_t ksi; char *msg = NULL; - register_t addr = 0; + intptr_t addr = 0; + register_t pc; trapdebug_enter(trapframe, 0); type = (trapframe->cause & CR_EXC_CODE) >> CR_EXC_CODE_SHIFT; - if (USERMODE(trapframe->sr)) { + if (TRAPF_USERMODE(trapframe)) { type |= T_USER; usermode = 1; } else { @@ -309,9 +305,9 @@ trap(struct trapframe *trapframe) */ if (trapframe->sr & SR_INT_ENAB) { set_intr_mask(~(trapframe->sr & ALL_INT_MASK)); - enableintr(); + intr_enable(); } else { - disableintr(); + intr_disable(); } #ifdef TRAP_DEBUG @@ -333,9 +329,9 @@ trap(struct trapframe *trapframe) printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif MachTLBGetPID(pid); - printf("badaddr = 0x%0x, pc = 0x%0x, ra = 0x%0x, sp = 0x%0x, sr = 0x%x, pid = %d, ASID = 0x%x\n", - trapframe->badvaddr, trapframe->pc, trapframe->ra, - trapframe->sp, trapframe->sr, + printf("badaddr = %#jx, pc = %#jx, ra = %#jx, sp = %#jx, sr = %jx, pid = %d, ASID = %u\n", + (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, + (intmax_t)trapframe->sp, (intmax_t)trapframe->sr, (curproc ? curproc->p_pid : -1), pid); switch (type & ~T_USER) { @@ -357,7 +353,7 @@ trap(struct trapframe *trapframe) ((type & ~T_USER) != T_SYSCALL)) { if (++count == 3) { trap_frame_dump(trapframe); - panic("too many faults at %x\n", last_badvaddr); + panic("too many faults at %p\n", (void *)last_badvaddr); } } else { last_badvaddr = this_badvaddr; @@ -378,35 +374,30 @@ trap(struct trapframe *trapframe) vm_offset_t pa; PMAP_LOCK(kernel_pmap); - if (!(pte = pmap_segmap(kernel_pmap, - trapframe->badvaddr))) - panic("trap: ktlbmod: invalid segmap"); - pte += (trapframe->badvaddr >> PGSHIFT) & (NPTEPG - 1); - entry = *pte; + pte = pmap_pte(kernel_pmap, trapframe->badvaddr); + if (pte == NULL) + panic("trap: ktlbmod: can't find PTE"); #ifdef SMP /* It is possible that some other CPU changed m-bit */ - if (!mips_pg_v(entry) || (entry & mips_pg_m_bit())) { - trapframe->badvaddr &= ~PGOFSET; + if (!mips_pg_v(*pte) || (*pte & mips_pg_m_bit())) { pmap_update_page(kernel_pmap, - trapframe->badvaddr, entry); + trapframe->badvaddr, *pte); PMAP_UNLOCK(kernel_pmap); return (trapframe->pc); } #else - if (!mips_pg_v(entry) || (entry & mips_pg_m_bit())) + if (!mips_pg_v(*pte) || (*pte & mips_pg_m_bit())) panic("trap: ktlbmod: invalid pte"); #endif - if (entry & mips_pg_ro_bit()) { + if (*pte & mips_pg_ro_bit()) { /* write to read only page in the kernel */ ftype = VM_PROT_WRITE; PMAP_UNLOCK(kernel_pmap); goto kernel_fault; } - entry |= mips_pg_m_bit(); - *pte = entry; - trapframe->badvaddr &= ~PGOFSET; - pmap_update_page(kernel_pmap, trapframe->badvaddr, entry); - pa = mips_tlbpfn_to_paddr(entry); + *pte |= mips_pg_m_bit(); + pmap_update_page(kernel_pmap, trapframe->badvaddr, *pte); + pa = mips_tlbpfn_to_paddr(*pte); if (!page_is_managed(pa)) panic("trap: ktlbmod: unmanaged page"); pmap_set_modified(pa); @@ -422,36 +413,30 @@ trap(struct trapframe *trapframe) pmap = &p->p_vmspace->vm_pmap; PMAP_LOCK(pmap); - if (!(pte = pmap_segmap(pmap, trapframe->badvaddr))) - panic("trap: utlbmod: invalid segmap"); - pte += (trapframe->badvaddr >> PGSHIFT) & (NPTEPG - 1); - entry = *pte; + pte = pmap_pte(pmap, trapframe->badvaddr); + if (pte == NULL) + panic("trap: utlbmod: can't find PTE"); #ifdef SMP /* It is possible that some other CPU changed m-bit */ - if (!mips_pg_v(entry) || (entry & mips_pg_m_bit())) { - trapframe->badvaddr = (trapframe->badvaddr & ~PGOFSET); - pmap_update_page(pmap, trapframe->badvaddr, entry); + if (!mips_pg_v(*pte) || (*pte & mips_pg_m_bit())) { + pmap_update_page(pmap, trapframe->badvaddr, *pte); PMAP_UNLOCK(pmap); goto out; } #else - if (!mips_pg_v(entry) || (entry & mips_pg_m_bit())) { + if (!mips_pg_v(*pte) || (*pte & mips_pg_m_bit())) panic("trap: utlbmod: invalid pte"); - } #endif - if (entry & mips_pg_ro_bit()) { + if (*pte & mips_pg_ro_bit()) { /* write to read only page */ ftype = VM_PROT_WRITE; PMAP_UNLOCK(pmap); goto dofault; } - entry |= mips_pg_m_bit(); - *pte = entry; - trapframe->badvaddr = (trapframe->badvaddr & ~PGOFSET); - pmap_update_page(pmap, trapframe->badvaddr, entry); - trapframe->badvaddr |= (pmap->pm_asid[PCPU_GET(cpuid)].asid << VMTLB_PID_SHIFT); - pa = mips_tlbpfn_to_paddr(entry); + *pte |= mips_pg_m_bit(); + pmap_update_page(pmap, trapframe->badvaddr, *pte); + pa = mips_tlbpfn_to_paddr(*pte); if (!page_is_managed(pa)) panic("trap: utlbmod: unmanaged page"); pmap_set_modified(pa); @@ -476,22 +461,29 @@ trap(struct trapframe *trapframe) rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) return (trapframe->pc); - if ((i = td->td_pcb->pcb_onfault) != 0) { - td->td_pcb->pcb_onfault = 0; - return (onfault_table[i]); + if (td->td_pcb->pcb_onfault != NULL) { + pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; + td->td_pcb->pcb_onfault = NULL; + return (pc); } goto err; } - /* + + /* * It is an error for the kernel to access user space except * through the copyin/copyout routines. */ - if ((i = td->td_pcb->pcb_onfault) == 0) + if (td->td_pcb->pcb_onfault == NULL) goto err; + /* check for fuswintr() or suswintr() getting a page fault */ - if (i == 4) { - return (onfault_table[i]); + /* XXX There must be a nicer way to do this. */ + if (td->td_pcb->pcb_onfault == fswintrberr) { + pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; + td->td_pcb->pcb_onfault = NULL; + return (pc); } + goto dofault; case T_TLB_LD_MISS + T_USER: @@ -510,7 +502,7 @@ dofault: vm = p->p_vmspace; map = &vm->vm_map; va = trunc_page((vm_offset_t)trapframe->badvaddr); - if ((vm_offset_t)trapframe->badvaddr >= VM_MIN_KERNEL_ADDRESS) { + if (KERNLAND(trapframe->badvaddr)) { /* * Don't allow user-mode faults in kernel * address space. @@ -532,9 +524,9 @@ dofault: --p->p_lock; PROC_UNLOCK(p); #ifdef VMFAULT_TRACE - printf("vm_fault(%p (pmap %p), %x (%x), %x, %d) -> %x at pc %x\n", - map, &vm->vm_pmap, va, trapframe->badvaddr, ftype, VM_FAULT_NORMAL, - rv, trapframe->pc); + printf("vm_fault(%p (pmap %p), %p (%p), %x, %d) -> %x at pc %p\n", + map, &vm->vm_pmap, (void *)va, (void *)(intptr_t)trapframe->badvaddr, + ftype, VM_FAULT_NORMAL, rv, (void *)(intptr_t)trapframe->pc); #endif if (rv == KERN_SUCCESS) { @@ -545,9 +537,10 @@ dofault: } nogo: if (!usermode) { - if ((i = td->td_pcb->pcb_onfault) != 0) { - td->td_pcb->pcb_onfault = 0; - return (onfault_table[i]); + if (td->td_pcb->pcb_onfault != NULL) { + pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; + td->td_pcb->pcb_onfault = NULL; + return (pc); } goto err; } @@ -609,6 +602,8 @@ dofault: int nargs, nsaved; register_t args[8]; + bzero(args, sizeof args); + /* * note: PCPU_LAZY_INC() can only be used if we can * afford occassional inaccuracy in the count. @@ -657,7 +652,6 @@ dofault: args[0] = locr0->a2; args[1] = locr0->a3; nsaved = 2; - quad_syscall = 1; break; default: @@ -682,7 +676,7 @@ dofault: nargs = callp->sy_narg; if (nargs > nsaved) { - i = copyin((caddr_t)(locr0->sp + + i = copyin((caddr_t)(intptr_t)(locr0->sp + 4 * sizeof(register_t)), (caddr_t)&args[nsaved], (u_int)(nargs - nsaved) * sizeof(register_t)); if (i) { @@ -773,7 +767,8 @@ dofault: case T_BREAK + T_USER: { - uintptr_t va, instr; + intptr_t va; + uint32_t instr; /* compute address of break instruction */ va = trapframe->pc; @@ -806,7 +801,7 @@ dofault: case T_IWATCH + T_USER: case T_DWATCH + T_USER: { - uintptr_t va; + intptr_t va; /* compute address of trapped instruction */ va = trapframe->pc; @@ -820,7 +815,8 @@ dofault: case T_TRAP + T_USER: { - uintptr_t va, instr; + intptr_t va; + uint32_t instr; struct trapframe *locr0 = td->td_frame; /* compute address of trap instruction */ @@ -842,6 +838,7 @@ dofault: } case T_RES_INST + T_USER: + log_illegal_instruction("RES_INST", trapframe); i = SIGILL; addr = trapframe->pc; break; @@ -856,11 +853,13 @@ dofault: #if !defined(CPU_HAVEFPU) /* FP (COP1) instruction */ if ((trapframe->cause & CR_COP_ERR) == 0x10000000) { + log_illegal_instruction("COP1_UNUSABLE", trapframe); i = SIGILL; break; } #endif if ((trapframe->cause & CR_COP_ERR) != 0x10000000) { + log_illegal_instruction("COPn_UNUSABLE", trapframe); i = SIGILL; /* only FPU instructions allowed */ break; } @@ -875,13 +874,13 @@ dofault: #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) trapDump("fpintr"); #else - printf("FPU Trap: PC %x CR %x SR %x\n", - trapframe->pc, trapframe->cause, trapframe->sr); + printf("FPU Trap: PC %#jx CR %x SR %x\n", + (intmax_t)trapframe->pc, (unsigned)trapframe->cause, (unsigned)trapframe->sr); goto err; #endif case T_FPE + T_USER: - MachFPTrap(trapframe->sr, trapframe->cause, trapframe->pc); + MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc); goto out; case T_OVFLOW + T_USER: @@ -892,8 +891,8 @@ dofault: case T_ADDR_ERR_LD: /* misaligned access */ case T_ADDR_ERR_ST: /* misaligned access */ #ifdef TRAP_DEBUG - printf("+++ ADDR_ERR: type = %d, badvaddr = %x\n", type, - trapframe->badvaddr); + printf("+++ ADDR_ERR: type = %d, badvaddr = %#jx\n", type, + (intmax_t)trapframe->badvaddr); #endif /* Only allow emulation on a user address */ if (allow_unaligned_acc && @@ -925,10 +924,12 @@ dofault: /* FALLTHROUGH */ case T_BUS_ERR_LD_ST: /* BERR asserted to cpu */ - if ((i = td->td_pcb->pcb_onfault) != 0) { - td->td_pcb->pcb_onfault = 0; - return (onfault_table[i]); + if (td->td_pcb->pcb_onfault != NULL) { + pc = (register_t)(intptr_t)td->td_pcb->pcb_onfault; + td->td_pcb->pcb_onfault = NULL; + return (pc); } + /* FALLTHROUGH */ default: @@ -950,9 +951,9 @@ err: printf("kernel mode)\n"); #ifdef TRAP_DEBUG - printf("badvaddr = %x, pc = %x, ra = %x, sr = 0x%x\n", - trapframe->badvaddr, trapframe->pc, trapframe->ra, - trapframe->sr); + printf("badvaddr = %#jx, pc = %#jx, ra = %#jx, sr = %#jxx\n", + (intmax_t)trapframe->badvaddr, (intmax_t)trapframe->pc, (intmax_t)trapframe->ra, + (intmax_t)trapframe->sr); #endif #ifdef KDB @@ -985,9 +986,10 @@ out: void trapDump(char *msg) { - int i, s; + register_t s; + int i; - s = disableintr(); + s = intr_disable(); printf("trapDump(%s)\n", msg); for (i = 0; i < TRAPSIZE; i++) { if (trp == trapdebug) { @@ -999,15 +1001,14 @@ trapDump(char *msg) if (trp->cause == 0) break; - printf("%s: ADR %x PC %x CR %x SR %x\n", + printf("%s: ADR %jx PC %jx CR %jx SR %jx\n", trap_type[(trp->cause & CR_EXC_CODE) >> CR_EXC_CODE_SHIFT], - trp->vadr, trp->pc, trp->cause, trp->status); + (intmax_t)trp->vadr, (intmax_t)trp->pc, (intmax_t)trp->cause, (intmax_t)trp->status); - printf(" RA %x SP %x code %d\n", trp->ra, trp->sp, trp->code); + printf(" RA %jx SP %jx code %d\n", (intmax_t)trp->ra, (intmax_t)trp->sp, (int)trp->code); } - restoreintr(s); + intr_restore(s); } - #endif @@ -1168,39 +1169,39 @@ static void log_frame_dump(struct trapframe *frame) { log(LOG_ERR, "Trapframe Register Dump:\n"); - log(LOG_ERR, "\tzero: %p\tat: %p\tv0: %p\tv1: %p\n", - (void *)0, (void *)frame->ast, (void *)frame->v0, (void *)frame->v1); + log(LOG_ERR, "\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", + (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); - log(LOG_ERR, "\ta0: %p\ta1: %p\ta2: %p\ta3: %p\n", - (void *)frame->a0, (void *)frame->a1, (void *)frame->a2, (void *)frame->a3); + log(LOG_ERR, "\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", + (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); - log(LOG_ERR, "\tt0: %p\tt1: %p\tt2: %p\tt3: %p\n", - (void *)frame->t0, (void *)frame->t1, (void *)frame->t2, (void *)frame->t3); + log(LOG_ERR, "\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", + (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); - log(LOG_ERR, "\tt4: %p\tt5: %p\tt6: %p\tt7: %p\n", - (void *)frame->t4, (void *)frame->t5, (void *)frame->t6, (void *)frame->t7); + log(LOG_ERR, "\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", + (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); - log(LOG_ERR, "\tt8: %p\tt9: %p\ts0: %p\ts1: %p\n", - (void *)frame->t8, (void *)frame->t9, (void *)frame->s0, (void *)frame->s1); + log(LOG_ERR, "\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", + (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); - log(LOG_ERR, "\ts2: %p\ts3: %p\ts4: %p\ts5: %p\n", - (void *)frame->s2, (void *)frame->s3, (void *)frame->s4, (void *)frame->s5); + log(LOG_ERR, "\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", + (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); - log(LOG_ERR, "\ts6: %p\ts7: %p\tk0: %p\tk1: %p\n", - (void *)frame->s6, (void *)frame->s7, (void *)frame->k0, (void *)frame->k1); + log(LOG_ERR, "\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", + (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); - log(LOG_ERR, "\tgp: %p\tsp: %p\ts8: %p\tra: %p\n", - (void *)frame->gp, (void *)frame->sp, (void *)frame->s8, (void *)frame->ra); + log(LOG_ERR, "\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", + (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); - log(LOG_ERR, "\tsr: %p\tmullo: %p\tmulhi: %p\tbadvaddr: %p\n", - (void *)frame->sr, (void *)frame->mullo, (void *)frame->mulhi, (void *)frame->badvaddr); + log(LOG_ERR, "\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", + (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); #ifdef IC_REG - log(LOG_ERR, "\tcause: %p\tpc: %p\tic: %p\n", - (void *)frame->cause, (void *)frame->pc, (void *)frame->ic); + log(LOG_ERR, "\tcause: %#jx\tpc: %#jx\tic: %#jx\n", + (intmax_t)frame->cause, (intmax_t)frame->pc, (intmax_t)frame->ic); #else - log(LOG_ERR, "\tcause: %p\tpc: %p\n", - (void *)frame->cause, (void *)frame->pc); + log(LOG_ERR, "\tcause: %#jx\tpc: %#jx\n", + (intmax_t)frame->cause, (intmax_t)frame->pc); #endif } @@ -1209,39 +1210,39 @@ static void trap_frame_dump(struct trapframe *frame) { printf("Trapframe Register Dump:\n"); - printf("\tzero: %p\tat: %p\tv0: %p\tv1: %p\n", - (void *)0, (void *)frame->ast, (void *)frame->v0, (void *)frame->v1); + printf("\tzero: %#jx\tat: %#jx\tv0: %#jx\tv1: %#jx\n", + (intmax_t)0, (intmax_t)frame->ast, (intmax_t)frame->v0, (intmax_t)frame->v1); - printf("\ta0: %p\ta1: %p\ta2: %p\ta3: %p\n", - (void *)frame->a0, (void *)frame->a1, (void *)frame->a2, (void *)frame->a3); + printf("\ta0: %#jx\ta1: %#jx\ta2: %#jx\ta3: %#jx\n", + (intmax_t)frame->a0, (intmax_t)frame->a1, (intmax_t)frame->a2, (intmax_t)frame->a3); - printf("\tt0: %p\tt1: %p\tt2: %p\tt3: %p\n", - (void *)frame->t0, (void *)frame->t1, (void *)frame->t2, (void *)frame->t3); + printf("\tt0: %#jx\tt1: %#jx\tt2: %#jx\tt3: %#jx\n", + (intmax_t)frame->t0, (intmax_t)frame->t1, (intmax_t)frame->t2, (intmax_t)frame->t3); - printf("\tt4: %p\tt5: %p\tt6: %p\tt7: %p\n", - (void *)frame->t4, (void *)frame->t5, (void *)frame->t6, (void *)frame->t7); + printf("\tt4: %#jx\tt5: %#jx\tt6: %#jx\tt7: %#jx\n", + (intmax_t)frame->t4, (intmax_t)frame->t5, (intmax_t)frame->t6, (intmax_t)frame->t7); - printf("\tt8: %p\tt9: %p\ts0: %p\ts1: %p\n", - (void *)frame->t8, (void *)frame->t9, (void *)frame->s0, (void *)frame->s1); + printf("\tt8: %#jx\tt9: %#jx\ts0: %#jx\ts1: %#jx\n", + (intmax_t)frame->t8, (intmax_t)frame->t9, (intmax_t)frame->s0, (intmax_t)frame->s1); - printf("\ts2: %p\ts3: %p\ts4: %p\ts5: %p\n", - (void *)frame->s2, (void *)frame->s3, (void *)frame->s4, (void *)frame->s5); + printf("\ts2: %#jx\ts3: %#jx\ts4: %#jx\ts5: %#jx\n", + (intmax_t)frame->s2, (intmax_t)frame->s3, (intmax_t)frame->s4, (intmax_t)frame->s5); - printf("\ts6: %p\ts7: %p\tk0: %p\tk1: %p\n", - (void *)frame->s6, (void *)frame->s7, (void *)frame->k0, (void *)frame->k1); + printf("\ts6: %#jx\ts7: %#jx\tk0: %#jx\tk1: %#jx\n", + (intmax_t)frame->s6, (intmax_t)frame->s7, (intmax_t)frame->k0, (intmax_t)frame->k1); - printf("\tgp: %p\tsp: %p\ts8: %p\tra: %p\n", - (void *)frame->gp, (void *)frame->sp, (void *)frame->s8, (void *)frame->ra); + printf("\tgp: %#jx\tsp: %#jx\ts8: %#jx\tra: %#jx\n", + (intmax_t)frame->gp, (intmax_t)frame->sp, (intmax_t)frame->s8, (intmax_t)frame->ra); - printf("\tsr: %p\tmullo: %p\tmulhi: %p\tbadvaddr: %p\n", - (void *)frame->sr, (void *)frame->mullo, (void *)frame->mulhi, (void *)frame->badvaddr); + printf("\tsr: %#jx\tmullo: %#jx\tmulhi: %#jx\tbadvaddr: %#jx\n", + (intmax_t)frame->sr, (intmax_t)frame->mullo, (intmax_t)frame->mulhi, (intmax_t)frame->badvaddr); #ifdef IC_REG - printf("\tcause: %p\tpc: %p\tic: %p\n", - (void *)frame->cause, (void *)frame->pc, (void *)frame->ic); + printf("\tcause: %#jx\tpc: %#jx\tic: %#jx\n", + (intmax_t)frame->cause, (intmax_t)frame->pc, (intmax_t)frame->ic); #else - printf("\tcause: %p\tpc: %p\n", - (void *)frame->cause, (void *)frame->pc); + printf("\tcause: %#jx\tpc: %#jx\n", + (intmax_t)frame->cause, (intmax_t)frame->pc); #endif } @@ -1255,7 +1256,7 @@ get_mapping_info(vm_offset_t va, pd_entry_t **pdepp, pt_entry_t **ptepp) pd_entry_t *pdep; struct proc *p = curproc; - pdep = (&(p->p_vmspace->vm_pmap.pm_segtab[va >> SEGSHIFT])); + pdep = (&(p->p_vmspace->vm_pmap.pm_segtab[(va >> SEGSHIFT) & (NPDEPG - 1)])); if (*pdep) ptep = pmap_pte(&p->p_vmspace->vm_pmap, va); else @@ -1265,6 +1266,50 @@ get_mapping_info(vm_offset_t va, pd_entry_t **pdepp, pt_entry_t **ptepp) *ptepp = ptep; } +static void +log_illegal_instruction(const char *msg, struct trapframe *frame) +{ + pt_entry_t *ptep; + pd_entry_t *pdep; + unsigned int *addr; + struct proc *p = curproc; + register_t pc; + +#ifdef SMP + printf("cpuid = %d\n", PCPU_GET(cpuid)); +#endif + pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); + log(LOG_ERR, "%s: pid %d (%s), uid %d: pc %#jx ra %#jx\n", + msg, p->p_pid, p->p_comm, + p->p_ucred ? p->p_ucred->cr_uid : -1, + (intmax_t)pc, + (intmax_t)frame->ra); + + /* log registers in trap frame */ + log_frame_dump(frame); + + get_mapping_info((vm_offset_t)pc, &pdep, &ptep); + + /* + * Dump a few words around faulting instruction, if the addres is + * valid. + */ + if (!(pc & 3) && + useracc((caddr_t)(intptr_t)pc, sizeof(int) * 4, VM_PROT_READ)) { + /* dump page table entry for faulting instruction */ + log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#x\n", + (intmax_t)pc, (void *)(intptr_t)*pdep, ptep ? *ptep : 0); + + addr = (unsigned int *)(intptr_t)pc; + log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", + addr); + log(LOG_ERR, "%08x %08x %08x %08x\n", + addr[0], addr[1], addr[2], addr[3]); + } else { + log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#x\n", + (intmax_t)pc, (void *)(intptr_t)*pdep, ptep ? *ptep : 0); + } +} static void log_bad_page_fault(char *msg, struct trapframe *frame, int trap_type) @@ -1296,12 +1341,12 @@ log_bad_page_fault(char *msg, struct trapframe *frame, int trap_type) } pc = frame->pc + (DELAYBRANCH(frame->cause) ? 4 : 0); - log(LOG_ERR, "%s: pid %d (%s), uid %d: pc %p got a %s fault at %p\n", + log(LOG_ERR, "%s: pid %d (%s), uid %d: pc %#jx got a %s fault at %#jx\n", msg, p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, - (void *)pc, + (intmax_t)pc, read_or_write, - (void *)frame->badvaddr); + (intmax_t)frame->badvaddr); /* log registers in trap frame */ log_frame_dump(frame); @@ -1314,21 +1359,24 @@ log_bad_page_fault(char *msg, struct trapframe *frame, int trap_type) */ if (!(pc & 3) && (pc != frame->badvaddr) && (trap_type != T_BUS_ERR_IFETCH) && - useracc((caddr_t)pc, sizeof(int) * 4, VM_PROT_READ)) { + useracc((caddr_t)(intptr_t)pc, sizeof(int) * 4, VM_PROT_READ)) { /* dump page table entry for faulting instruction */ - log(LOG_ERR, "Page table info for pc address %p: pde = %p, pte = 0x%lx\n", - (void *)pc, *pdep, ptep ? *ptep : 0); + log(LOG_ERR, "Page table info for pc address %#jx: pde = %p, pte = %#x\n", + (intmax_t)pc, (void *)(intptr_t)*pdep, ptep ? *ptep : 0); - addr = (unsigned int *)pc; + addr = (unsigned int *)(intptr_t)pc; log(LOG_ERR, "Dumping 4 words starting at pc address %p: \n", addr); log(LOG_ERR, "%08x %08x %08x %08x\n", addr[0], addr[1], addr[2], addr[3]); } else { - log(LOG_ERR, "pc address %p is inaccessible, pde = 0x%p, pte = 0x%lx\n", - (void *)pc, *pdep, ptep ? *ptep : 0); + log(LOG_ERR, "pc address %#jx is inaccessible, pde = %p, pte = %#x\n", + (intmax_t)pc, (void *)(intptr_t)*pdep, ptep ? *ptep : 0); } - /* panic("Bad trap");*/ + + get_mapping_info((vm_offset_t)frame->badvaddr, &pdep, &ptep); + log(LOG_ERR, "Page table info for bad address %#jx: pde = %p, pte = %#x\n", + (intmax_t)frame->badvaddr, (void *)(intptr_t)*pdep, ptep ? *ptep : 0); } @@ -1339,7 +1387,7 @@ static int mips_unaligned_load_store(struct trapframe *frame, register_t addr, register_t pc) { register_t *reg = (register_t *) frame; - u_int32_t inst = *((u_int32_t *) pc); + u_int32_t inst = *((u_int32_t *)(intptr_t)pc); u_int32_t value_msb, value; int access_type = 0; @@ -1435,9 +1483,9 @@ emulate_unaligned_access(struct trapframe *frame) else frame->pc += 4; - log(LOG_INFO, "Unaligned %s: pc=%p, badvaddr=%p\n", - access_name[access_type - 1], (void *)pc, - (void *)frame->badvaddr); + log(LOG_INFO, "Unaligned %s: pc=%#jx, badvaddr=%#jx\n", + access_name[access_type - 1], (intmax_t)pc, + (intmax_t)frame->badvaddr); } } return access_type; diff --git a/sys/mips/mips/uio_machdep.c b/sys/mips/mips/uio_machdep.c index 0872b4d388e..10deff667e5 100644 --- a/sys/mips/mips/uio_machdep.c +++ b/sys/mips/mips/uio_machdep.c @@ -32,8 +32,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 - * from: src/sys/i386/i386/uio_machdep.c,v 1.8 2005/02/13 23:09:36 alc + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 */ #include @@ -44,17 +43,18 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include #include #include +#include /* - * Implement uiomove(9) from physical memory using sf_bufs to reduce - * the creation and destruction of ephemeral mappings. + * Implement uiomove(9) from physical memory using a combination + * of the direct mapping and sf_bufs to reduce the creation and + * destruction of ephemeral mappings. */ int uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) @@ -64,6 +64,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) struct iovec *iov; void *cp; vm_offset_t page_offset; + vm_paddr_t pa; + vm_page_t m; size_t cnt; int error = 0; int save = 0; @@ -85,10 +87,16 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) if (cnt > n) cnt = n; page_offset = offset & PAGE_MASK; - cnt = min(cnt, PAGE_SIZE - page_offset); - sched_pin(); - sf = sf_buf_alloc(ma[offset >> PAGE_SHIFT], SFB_CPUPRIVATE); - cp = (char *)sf_buf_kva(sf) + page_offset; + cnt = ulmin(cnt, PAGE_SIZE - page_offset); + m = ma[offset >> PAGE_SHIFT]; + pa = VM_PAGE_TO_PHYS(m); + if (pa < MIPS_KSEG0_LARGEST_PHYS) { + cp = (char *)MIPS_PHYS_TO_KSEG0(pa); + sf = NULL; + } else { + sf = sf_buf_alloc(m, 0); + cp = (char *)sf_buf_kva(sf) + page_offset; + } switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) @@ -98,8 +106,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) else error = copyin(iov->iov_base, cp, cnt); if (error) { - sf_buf_free(sf); - sched_unpin(); + if (sf != NULL) + sf_buf_free(sf); goto out; } break; @@ -112,8 +120,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) case UIO_NOCOPY: break; } - sf_buf_free(sf); - sched_unpin(); + if (sf != NULL) + sf_buf_free(sf); iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c index 3f8e6cc7f29..20243038d2b 100644 --- a/sys/mips/mips/vm_machdep.c +++ b/sys/mips/mips/vm_machdep.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -63,12 +64,15 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -81,25 +85,17 @@ __FBSDID("$FreeBSD$"); static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); -LIST_HEAD(sf_head, sf_buf); - - /* - * A hash table of active sendfile(2) buffers + * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the + * sf_freelist head with the sf_lock mutex. */ -static struct sf_head *sf_buf_active; -static u_long sf_buf_hashmask; +static struct { + SLIST_HEAD(, sf_buf) sf_head; + struct mtx sf_lock; +} sf_freelist; -#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) - -static TAILQ_HEAD(, sf_buf) sf_buf_freelist; static u_int sf_buf_alloc_want; -/* - * A lock used to synchronize access to the hash table and free list - */ -static struct mtx sf_buf_lock; - /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -148,7 +144,7 @@ cpu_fork(register struct thread *td1,register struct proc *p2, pcb2->pcb_context[PCB_REG_RA] = (register_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td2->td_pcb & - ~(sizeof(__int64_t) - 1)) - STAND_FRAME_SIZE); + ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)td2; pcb2->pcb_context[PCB_REG_S2] = (register_t)td2->td_frame; @@ -214,16 +210,6 @@ cpu_thread_swapin(struct thread *td) { pt_entry_t *pte; int i; - vm_offset_t unused_kstack_page; - - /* - * Unmap the unused kstack page. - */ - unused_kstack_page = td->td_kstack; - if (td->td_md.md_realstack == td->td_kstack) - unused_kstack_page += (KSTACK_PAGES - 1) * PAGE_SIZE; - - pmap_kremove(unused_kstack_page); /* * The kstack may be at a different physical address now. @@ -231,13 +217,9 @@ cpu_thread_swapin(struct thread *td) * part of the thread struct so cpu_switch() can quickly map in * the pcb struct and kernel stack. */ - if (!(pte = pmap_segmap(kernel_pmap, td->td_md.md_realstack))) - panic("cpu_thread_swapin: invalid segmap"); - pte += ((vm_offset_t)td->td_md.md_realstack >> PGSHIFT) & (NPTEPG - 1); - - for (i = 0; i < KSTACK_PAGES - 1; i++) { + for (i = 0; i < KSTACK_PAGES; i++) { + pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~(PTE_RO|PTE_WIRED); - pte++; } } @@ -249,31 +231,17 @@ cpu_thread_swapout(struct thread *td) void cpu_thread_alloc(struct thread *td) { - vm_offset_t unused_kstack_page; pt_entry_t *pte; int i; - if (td->td_kstack & (1 << PAGE_SHIFT)) { - td->td_md.md_realstack = td->td_kstack + PAGE_SIZE; - unused_kstack_page = td->td_kstack; - } else { - td->td_md.md_realstack = td->td_kstack; - unused_kstack_page = td->td_kstack + - (KSTACK_PAGES - 1) * PAGE_SIZE; - } - pmap_kremove(unused_kstack_page); - - td->td_pcb = (struct pcb *)(td->td_md.md_realstack + - (td->td_kstack_pages - 1) * PAGE_SIZE) - 1; + KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned.")); + td->td_pcb = (struct pcb *)(td->td_kstack + + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = &td->td_pcb->pcb_regs; - if (!(pte = pmap_segmap(kernel_pmap, td->td_md.md_realstack))) - panic("cpu_thread_alloc: invalid segmap"); - pte += ((vm_offset_t)td->td_md.md_realstack >> PGSHIFT) & (NPTEPG - 1); - - for (i = 0; i < KSTACK_PAGES - 1; i++) { + for (i = 0; i < KSTACK_PAGES; i++) { + pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~(PTE_RO|PTE_WIRED); - pte++; } } @@ -371,7 +339,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0) pcb2->pcb_context[PCB_REG_RA] = (register_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td->td_pcb & - ~(sizeof(__int64_t) - 1)) - STAND_FRAME_SIZE); + ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)td; pcb2->pcb_context[PCB_REG_S2] = (register_t)td->td_frame; @@ -418,7 +386,7 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, * in ``See MIPS Run'' by D. Sweetman, p. 269 * align stack */ sp = ((register_t)(stack->ss_sp + stack->ss_size) & ~0x7) - - STAND_FRAME_SIZE; + CALLFRAME_SIZ; /* * Set the trap frame to point at the beginning of the uts @@ -487,56 +455,34 @@ sf_buf_init(void *arg) nsfbufs = NSFBUFS; TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); - TAILQ_INIT(&sf_buf_freelist); + mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); + SLIST_INIT(&sf_freelist.sf_head); sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT | M_ZERO); for (i = 0; i < nsfbufs; i++) { sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); } sf_buf_alloc_want = 0; - mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); } /* - * Allocate an sf_buf for the given vm_page. On this machine, however, there - * is no sf_buf object. Instead, an opaque pointer to the given vm_page is - * returned. + * Get an sf_buf from the freelist. Will block if none are available. */ struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags) { - struct sf_head *hash_list; struct sf_buf *sf; int error; - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - sf->ref_count++; - if (sf->ref_count == 1) { - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - } - /* - * Flush all mappings in order to have up to date - * physycal memory - */ - pmap_flush_pvcache(sf->m); - mips_dcache_inv_range(sf->kva, PAGE_SIZE); - goto done; - } - } - while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { + mtx_lock(&sf_freelist.sf_lock); + while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { if (flags & SFB_NOWAIT) - goto done; + break; sf_buf_alloc_want++; mbstat.sf_allocwait++; - error = msleep(&sf_buf_freelist, &sf_buf_lock, + error = msleep(&sf_freelist, &sf_freelist.sf_lock, (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); sf_buf_alloc_want--; @@ -544,42 +490,33 @@ sf_buf_alloc(struct vm_page *m, int flags) * If we got a signal, don't risk going back to sleep. */ if (error) - goto done; + break; } - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - if (sf->m != NULL) - LIST_REMOVE(sf, list_entry); - LIST_INSERT_HEAD(hash_list, sf, list_entry); - sf->ref_count = 1; - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_qenter(sf->kva, &sf->m, 1); -done: - mtx_unlock(&sf_buf_lock); + if (sf != NULL) { + SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); + sf->m = m; + nsfbufsused++; + nsfbufspeak = imax(nsfbufspeak, nsfbufsused); + pmap_qenter(sf->kva, &sf->m, 1); + } + mtx_unlock(&sf_freelist.sf_lock); return (sf); } /* - * Free the sf_buf. In fact, do nothing because there are no resources - * associated with the sf_buf. + * Release resources back to the system. */ void sf_buf_free(struct sf_buf *sf) { - mtx_lock(&sf_buf_lock); - sf->ref_count--; - /* - * Make sure all changes in KVA end up in physical memory - */ - mips_dcache_wbinv_range(sf->kva, PAGE_SIZE); - if (sf->ref_count == 0) { - TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); - nsfbufsused--; - if (sf_buf_alloc_want > 0) - wakeup_one(&sf_buf_freelist); - } - mtx_unlock(&sf_buf_lock); + + pmap_qremove(sf->kva, 1); + mtx_lock(&sf_freelist.sf_lock); + SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); + nsfbufsused--; + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_freelist); + mtx_unlock(&sf_freelist.sf_lock); } /* @@ -698,7 +635,7 @@ DB_SHOW_COMMAND(pcb, ddb_dump_pcb) DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_GP); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_PC); - db_printf("PCB onfault = %d\n", pcb->pcb_onfault); + db_printf("PCB onfault = %p\n", pcb->pcb_onfault); db_printf("md_saved_intr = 0x%0lx\n", (long)td->td_md.md_saved_intr); db_printf("md_spinlock_count = %d\n", td->td_md.md_spinlock_count); diff --git a/sys/mips/rmi/clock.c b/sys/mips/rmi/clock.c index 6964522e9c9..c123edf589e 100644 --- a/sys/mips/rmi/clock.c +++ b/sys/mips/rmi/clock.c @@ -116,11 +116,11 @@ count_compare_clockhandler(struct trapframe *tf) cycles += XLR_CPU_HZ / hz; mips_wr_compare(cycles); - hardclock_cpu(USERMODE(tf->sr)); + hardclock_cpu(TRAPF_USERMODE(tf)); if (count_scale_factor[cpu] == STAT_PROF_CLOCK_SCALE_FACTOR) { - statclock(USERMODE(tf->sr)); + statclock(TRAPF_USERMODE(tf)); if (profprocs != 0) { - profclock(USERMODE(tf->sr), tf->pc); + profclock(TRAPF_USERMODE(tf), tf->pc); } count_scale_factor[cpu] = 0; } @@ -148,11 +148,11 @@ pic_hardclockhandler(struct trapframe *tf) printf("Clock tick foo at %ld\n", clock_tick_foo); } */ - hardclock(USERMODE(tf->sr), tf->pc); + hardclock(TRAPF_USERMODE(tf), tf->pc); if (scale_factor == STAT_PROF_CLOCK_SCALE_FACTOR) { - statclock(USERMODE(tf->sr)); + statclock(TRAPF_USERMODE(tf)); if (profprocs != 0) { - profclock(USERMODE(tf->sr), tf->pc); + profclock(TRAPF_USERMODE(tf), tf->pc); } scale_factor = 0; } diff --git a/sys/mips/rmi/debug.h b/sys/mips/rmi/debug.h old mode 100755 new mode 100644 diff --git a/sys/mips/rmi/dev/sec/desc.h b/sys/mips/rmi/dev/sec/desc.h old mode 100755 new mode 100644 diff --git a/sys/mips/rmi/msgring.h b/sys/mips/rmi/msgring.h old mode 100755 new mode 100644 diff --git a/sys/mips/rmi/shared_structs.h b/sys/mips/rmi/shared_structs.h old mode 100755 new mode 100644 diff --git a/sys/mips/rmi/shared_structs_func.h b/sys/mips/rmi/shared_structs_func.h old mode 100755 new mode 100644 diff --git a/sys/mips/rmi/shared_structs_offsets.h b/sys/mips/rmi/shared_structs_offsets.h old mode 100755 new mode 100644 diff --git a/sys/mips/sibyte/sb_machdep.c b/sys/mips/sibyte/sb_machdep.c index dca2869a5f2..559bf741502 100644 --- a/sys/mips/sibyte/sb_machdep.c +++ b/sys/mips/sibyte/sb_machdep.c @@ -158,6 +158,17 @@ mips_init(void) TUNABLE_INT_FETCH("hw.physmem", &tmp); maxmem = (uint64_t)tmp * 1024; + /* + * XXX + * If we used vm_paddr_t consistently in pmap, etc., we could + * use 64-bit page numbers on !n64 systems, too, like i386 + * does with PAE. + */ +#if !defined(__mips_n64) + if (maxmem == 0 || maxmem > 0xffffffff) + maxmem = 0xffffffff; +#endif + #ifdef CFE /* * Query DRAM memory map from CFE. diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 3af154c750b..601b73be9bb 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -188,6 +188,7 @@ SUBDIR= ${_3dfx} \ msdosfs_iconv \ ${_mse} \ msk \ + mvs \ mwl \ mxge \ my \ @@ -252,6 +253,7 @@ SUBDIR= ${_3dfx} \ sdhci \ sem \ sf \ + sge \ siba_bwn \ siis \ sis \ diff --git a/sys/modules/acpi/acpi/Makefile b/sys/modules/acpi/acpi/Makefile index 6fa36ffcd70..2606023dbaf 100644 --- a/sys/modules/acpi/acpi/Makefile +++ b/sys/modules/acpi/acpi/Makefile @@ -33,8 +33,8 @@ SRCS+= dmbuffer.c dmnames.c dmopcode.c dmobject.c dmresrc.c dmresrcl.c SRCS+= dmresrcs.c dmutils.c dmwalk.c SRCS+= dsfield.c dsinit.c dsmethod.c dsmthdat.c dsobject.c dsopcode.c SRCS+= dsutils.c dswexec.c dswload.c dswscope.c dswstate.c -SRCS+= evevent.c evgpe.c evgpeblk.c evmisc.c evregion.c evrgnini.c evsci.c -SRCS+= evxface.c evxfevnt.c evxfregn.c +SRCS+= evevent.c evgpe.c evgpeblk.c evgpeinit.c evgpeutil.c evmisc.c +SRCS+= evregion.c evrgnini.c evsci.c evxface.c evxfevnt.c evxfregn.c SRCS+= exconfig.c exconvrt.c excreate.c exdebug.c exdump.c exfield.c SRCS+= exfldio.c exmisc.c exmutex.c exnames.c exoparg1.c exoparg2.c SRCS+= exoparg3.c exoparg6.c exprep.c exregion.c exresnte.c exresolv.c diff --git a/sys/modules/cyclic/Makefile b/sys/modules/cyclic/Makefile index db99488e138..371dac68054 100644 --- a/sys/modules/cyclic/Makefile +++ b/sys/modules/cyclic/Makefile @@ -10,7 +10,7 @@ SRCS+= vnode_if.h CFLAGS+= -I${.CURDIR}/../../cddl/compat/opensolaris \ -I${.CURDIR}/../../cddl/contrib/opensolaris/uts/common \ -I${.CURDIR}/../.. \ - -I${.CURDIR}/../../cddl/dev/cyclic/${MACHINE_ARCH} + -I${.CURDIR}/../../cddl/dev/cyclic/${MACHINE_ARCH:S/amd64/i386/} CFLAGS+= -DDEBUG=1 diff --git a/sys/modules/mvs/Makefile b/sys/modules/mvs/Makefile new file mode 100644 index 00000000000..f33a0927858 --- /dev/null +++ b/sys/modules/mvs/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../dev/mvs + +KMOD= mvs +SRCS= mvs.c mvs_pci.c mvs.h mvs_if.h device_if.h bus_if.h pci_if.h opt_cam.h + +MFILES= kern/bus_if.m kern/device_if.m dev/pci/pci_if.m dev/mvs/mvs_if.m + +.include diff --git a/sys/modules/sge/Makefile b/sys/modules/sge/Makefile new file mode 100644 index 00000000000..5f8c5879554 --- /dev/null +++ b/sys/modules/sge/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../dev/sge + +KMOD= if_sge +SRCS= if_sge.c device_if.h bus_if.h pci_if.h miibus_if.h + +.include diff --git a/sys/modules/usb/usb/Makefile b/sys/modules/usb/usb/Makefile index 5b0104e82ae..32a21075005 100644 --- a/sys/modules/usb/usb/Makefile +++ b/sys/modules/usb/usb/Makefile @@ -31,7 +31,7 @@ S= ${.CURDIR}/../../.. KMOD= usb SRCS= bus_if.h device_if.h usb_if.h usb_if.c vnode_if.h \ - opt_usb.h opt_bus.h \ + opt_usb.h opt_bus.h opt_ddb.h \ usbdevs.h usbdevs_data.h \ usb_busdma.c usb_controller.c usb_compat_linux.c usb_core.c usb_debug.c \ usb_dev.c usb_device.c usb_dynamic.c usb_error.c usb_generic.c \ diff --git a/sys/net/bpf.c b/sys/net/bpf.c index bac3da40308..16245c947da 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include "opt_bpf.h" +#include "opt_compat.h" #include "opt_netgraph.h" #include @@ -89,6 +90,43 @@ MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); #define PRINET 26 /* interruptible */ +#ifdef COMPAT_FREEBSD32 +#include +#include +#define BPF_ALIGNMENT32 sizeof(int32_t) +#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1)) + +/* + * 32-bit version of structure prepended to each packet. We use this header + * instead of the standard one for 32-bit streams. We mark the a stream as + * 32-bit the first time we see a 32-bit compat ioctl request. + */ +struct bpf_hdr32 { + struct timeval32 bh_tstamp; /* time stamp */ + uint32_t bh_caplen; /* length of captured portion */ + uint32_t bh_datalen; /* original length of packet */ + uint16_t bh_hdrlen; /* length of bpf header (this struct + plus alignment padding) */ +}; + +struct bpf_program32 { + u_int bf_len; + uint32_t bf_insns; +}; + +struct bpf_dltlist32 { + u_int bfl_len; + u_int bfl_list; +}; + +#define BIOCSETF32 _IOW('B', 103, struct bpf_program32) +#define BIOCSRTIMEOUT32 _IOW('B',109, struct timeval32) +#define BIOCGRTIMEOUT32 _IOR('B',110, struct timeval32) +#define BIOCGDLTLIST32 _IOWR('B',121, struct bpf_dltlist32) +#define BIOCSETWF32 _IOW('B',123, struct bpf_program32) +#define BIOCSETFNR32 _IOW('B',130, struct bpf_program32) +#endif + /* * bpf_iflist is a list of BPF interface structures, each corresponding to a * specific DLT. The same network interface might have several BPF interface @@ -1005,8 +1043,14 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case BIOCFLUSH: case BIOCGDLT: case BIOCGDLTLIST: +#ifdef COMPAT_FREEBSD32 + case BIOCGDLTLIST32: +#endif case BIOCGETIF: case BIOCGRTIMEOUT: +#ifdef COMPAT_FREEBSD32 + case BIOCGRTIMEOUT32: +#endif case BIOCGSTATS: case BIOCVERSION: case BIOCGRSIG: @@ -1015,6 +1059,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case FIONREAD: case BIOCLOCK: case BIOCSRTIMEOUT: +#ifdef COMPAT_FREEBSD32 + case BIOCSRTIMEOUT32: +#endif case BIOCIMMEDIATE: case TIOCGPGRP: case BIOCROTZBUF: @@ -1023,6 +1070,22 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, return (EPERM); } } +#ifdef COMPAT_FREEBSD32 + /* + * If we see a 32-bit compat ioctl, mark the stream as 32-bit so + * that it will get 32-bit packet headers. + */ + switch (cmd) { + case BIOCSETF32: + case BIOCSETFNR32: + case BIOCSETWF32: + case BIOCGDLTLIST32: + case BIOCGRTIMEOUT32: + case BIOCSRTIMEOUT32: + d->bd_compat32 = 1; + } +#endif + CURVNET_SET(TD_TO_VNET(td)); switch (cmd) { @@ -1080,6 +1143,11 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, case BIOCSETF: case BIOCSETFNR: case BIOCSETWF: +#ifdef COMPAT_FREEBSD32 + case BIOCSETF32: + case BIOCSETFNR32: + case BIOCSETWF32: +#endif error = bpf_setf(d, (struct bpf_program *)addr, cmd); break; @@ -1123,6 +1191,26 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, /* * Get a list of supported data link types. */ +#ifdef COMPAT_FREEBSD32 + case BIOCGDLTLIST32: + { + struct bpf_dltlist32 *list32; + struct bpf_dltlist dltlist; + + list32 = (struct bpf_dltlist32 *)addr; + dltlist.bfl_len = list32->bfl_len; + dltlist.bfl_list = PTRIN(list32->bfl_list); + if (d->bd_bif == NULL) + error = EINVAL; + else { + error = bpf_getdltlist(d, &dltlist); + if (error == 0) + list32->bfl_len = dltlist.bfl_len; + } + break; + } +#endif + case BIOCGDLTLIST: if (d->bd_bif == NULL) error = EINVAL; @@ -1166,8 +1254,23 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Set read timeout. */ case BIOCSRTIMEOUT: +#ifdef COMPAT_FREEBSD32 + case BIOCSRTIMEOUT32: +#endif { struct timeval *tv = (struct timeval *)addr; +#ifdef COMPAT_FREEBSD32 + struct timeval32 *tv32; + struct timeval tv64; + + if (cmd == BIOCSRTIMEOUT32) { + tv32 = (struct timeval32 *)addr; + tv = &tv64; + tv->tv_sec = tv32->tv_sec; + tv->tv_usec = tv32->tv_usec; + } else +#endif + tv = (struct timeval *)addr; /* * Subtract 1 tick from tvtohz() since this isn't @@ -1182,11 +1285,31 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, * Get read timeout. */ case BIOCGRTIMEOUT: +#ifdef COMPAT_FREEBSD32 + case BIOCGRTIMEOUT32: +#endif { - struct timeval *tv = (struct timeval *)addr; + struct timeval *tv; +#ifdef COMPAT_FREEBSD32 + struct timeval32 *tv32; + struct timeval tv64; + + if (cmd == BIOCGRTIMEOUT32) + tv = &tv64; + else +#endif + tv = (struct timeval *)addr; tv->tv_sec = d->bd_rtout / hz; tv->tv_usec = (d->bd_rtout % hz) * tick; +#ifdef COMPAT_FREEBSD32 + if (cmd == BIOCGRTIMEOUT32) { + tv32 = (struct timeval32 *)addr; + tv32->tv_sec = tv->tv_sec; + tv32->tv_usec = tv->tv_usec; + } +#endif + break; } @@ -1331,6 +1454,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, /* FALLSTHROUGH */ default: + CURVNET_RESTORE(); return (EINVAL); } @@ -1338,6 +1462,7 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || d->bd_fbuf != NULL || d->bd_bif != NULL) { BPFD_UNLOCK(d); + CURVNET_RESTORE(); return (EBUSY); } d->bd_bufmode = *(u_int *)addr; @@ -1345,13 +1470,16 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, break; case BIOCGETZMAX: - return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); + error = bpf_ioctl_getzmax(td, d, (size_t *)addr); + break; case BIOCSETZBUF: - return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); + error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); + break; case BIOCROTZBUF: - return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); + error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); + break; } CURVNET_RESTORE(); return (error); @@ -1369,7 +1497,19 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) #ifdef BPF_JITTER bpf_jit_filter *ofunc; #endif +#ifdef COMPAT_FREEBSD32 + struct bpf_program32 *fp32; + struct bpf_program fp_swab; + if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) { + fp32 = (struct bpf_program32 *)fp; + fp_swab.bf_len = fp32->bf_len; + fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns; + fp = &fp_swab; + if (cmd == BIOCSETWF32) + cmd = BIOCSETWF; + } +#endif if (cmd == BIOCSETWF) { old = d->bd_wfilter; wfilter = 1; @@ -1771,6 +1911,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, struct timeval *tv) { struct bpf_hdr hdr; +#ifdef COMPAT_FREEBSD32 + struct bpf_hdr32 hdr32; +#endif int totlen, curlen; int hdrlen = d->bd_bif->bif_hdrlen; int do_wakeup = 0; @@ -1809,7 +1952,12 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * buffer is considered immutable by the buffer model, try to rotate * the buffer and wakeup pending processes. */ - curlen = BPF_WORDALIGN(d->bd_slen); +#ifdef COMPAT_FREEBSD32 + if (d->bd_compat32) + curlen = BPF_WORDALIGN32(d->bd_slen); + else +#endif + curlen = BPF_WORDALIGN(d->bd_slen); if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { if (d->bd_fbuf == NULL) { /* @@ -1831,6 +1979,22 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * reader should be woken up. */ do_wakeup = 1; +#ifdef COMPAT_FREEBSD32 + /* + * If this is a 32-bit stream, then stick a 32-bit header at the + * front and copy the data into the buffer. + */ + if (d->bd_compat32) { + bzero(&hdr32, sizeof(hdr32)); + hdr32.bh_tstamp.tv_sec = tv->tv_sec; + hdr32.bh_tstamp.tv_usec = tv->tv_usec; + hdr32.bh_datalen = pktlen; + hdr32.bh_hdrlen = hdrlen; + hdr.bh_caplen = hdr32.bh_caplen = totlen - hdrlen; + bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32, sizeof(hdr32)); + goto copy; + } +#endif /* * Append the bpf header. Note we append the actual header size, but @@ -1846,6 +2010,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, /* * Copy the packet data into the store buffer and update its length. */ +#ifdef COMPAT_FREEBSD32 + copy: +#endif (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); d->bd_slen = curlen + totlen; diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c index a1dd923a4f5..91efd649db4 100644 --- a/sys/net/bpf_zerocopy.c +++ b/sys/net/bpf_zerocopy.c @@ -112,11 +112,11 @@ static void zbuf_page_free(vm_page_t pp) { - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_unwire(pp, 0); if (pp->wire_count == 0 && pp->object == NULL) vm_page_free(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); } /* @@ -168,10 +168,10 @@ zbuf_sfbuf_get(struct vm_map *map, vm_offset_t uaddr) VM_PROT_WRITE); if (pp == NULL) return (NULL); - vm_page_lock_queues(); + vm_page_lock(pp); vm_page_wire(pp); vm_page_unhold(pp); - vm_page_unlock_queues(); + vm_page_unlock(pp); sf = sf_buf_alloc(pp, SFB_NOWAIT); if (sf == NULL) { zbuf_page_free(pp); diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h index 5784763ed8a..03cb20dd519 100644 --- a/sys/net/bpfdesc.h +++ b/sys/net/bpfdesc.h @@ -97,6 +97,7 @@ struct bpf_d { u_int64_t bd_wfcount; /* number of packets that matched write filter */ u_int64_t bd_wdcount; /* number of packets dropped during a write */ u_int64_t bd_zcopy; /* number of zero copy operations */ + u_char bd_compat32; /* 32-bit stream on LP64 system */ }; /* Values for bd_state */ diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c index 39b6b40a996..0e4beb4a07f 100644 --- a/sys/net/flowtable.c +++ b/sys/net/flowtable.c @@ -1626,7 +1626,7 @@ flowtable_init(const void *unused __unused) EVENTHANDLER_PRI_ANY); flowclean_freq = 20*hz; } -SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE, +SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, flowtable_init, NULL); diff --git a/sys/net/if.c b/sys/net/if.c index e4a20054963..780f2c291aa 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -104,6 +104,10 @@ struct ifindex_entry { SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); +TUNABLE_INT("net.link.ifqmaxlen", &ifqmaxlen); +SYSCTL_UINT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, + &ifqmaxlen, 0, "max send queue size"); + /* Log link state change events */ static int log_link_state_change = 1; @@ -163,9 +167,11 @@ static void if_detach_internal(struct ifnet *, int); extern void nd6_setmtu(struct ifnet *); #endif +VNET_DEFINE(int, if_index); +int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); -VNET_DEFINE(int, if_index); + static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ @@ -174,8 +180,6 @@ static VNET_DEFINE(struct ifindex_entry *, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) -int ifqmaxlen = IFQ_MAXLEN; - /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and @@ -2049,14 +2053,13 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); - if (ifp->if_description == NULL) { - ifr->ifr_buffer.length = 0; + if (ifp->if_description == NULL) error = ENOMSG; - } else { + else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr->ifr_buffer.length < descrlen) - error = ENAMETOOLONG; + ifr->ifr_buffer.buffer = NULL; else error = copyout(ifp->if_description, ifr->ifr_buffer.buffer, descrlen); diff --git a/sys/net/if_ef.c b/sys/net/if_ef.c index 94fb03d13a4..61f7cb7933f 100644 --- a/sys/net/if_ef.c +++ b/sys/net/if_ef.c @@ -129,7 +129,7 @@ ef_attach(struct efnet *sc) ifp->if_start = ef_start; ifp->if_init = ef_init; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); /* * Attach the interface diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c index 22ef6abc8ee..76838396bc4 100644 --- a/sys/net/if_gif.c +++ b/sys/net/if_gif.c @@ -94,20 +94,9 @@ */ static struct mtx gif_mtx; static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); - static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); - #define V_gif_softc_list VNET(gif_softc_list) -#ifdef INET -VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL; -#define V_ip_gif_ttl VNET(ip_gif_ttl) -#endif -#ifdef INET6 -VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; -#define V_ip6_gif_hlim VNET(ip6_gif_hlim) -#endif - void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); @@ -135,19 +124,11 @@ SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, */ #define MAX_GIF_NEST 1 #endif - static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST; #define V_max_gif_nesting VNET(max_gif_nesting) - SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); -#ifdef INET6 -SYSCTL_DECL(_net_inet6_ip6); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, - &VNET_NAME(ip6_gif_hlim), 0, ""); -#endif - /* * By default, we disallow creation of multiple tunnels between the same * pair of addresses. Some applications require this functionality so @@ -159,7 +140,6 @@ static VNET_DEFINE(int, parallel_tunnels) = 1; static VNET_DEFINE(int, parallel_tunnels) = 0; #endif #define V_parallel_tunnels VNET(parallel_tunnels) - SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?"); @@ -205,7 +185,7 @@ gif_clone_create(ifc, unit, params) GIF2IFP(sc)->if_ioctl = gif_ioctl; GIF2IFP(sc)->if_start = gif_start; GIF2IFP(sc)->if_output = gif_output; - GIF2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN; + GIF2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; if_attach(GIF2IFP(sc)); bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t)); if (ng_gif_attach_p != NULL) diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h index 18b7831a45b..5d0db6d1433 100644 --- a/sys/net/if_gif.h +++ b/sys/net/if_gif.h @@ -118,10 +118,6 @@ int gif_ioctl(struct ifnet *, u_long, caddr_t); int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *); void gif_delete_tunnel(struct ifnet *); int gif_encapcheck(const struct mbuf *, int, int, void *); - -VNET_DECLARE(int, ip_gif_ttl); -#define V_ip_gif_ttl VNET(ip_gif_ttl) - #endif /* _KERNEL */ #define GIFGOPTS _IOWR('i', 150, struct ifreq) diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index bac3d7954fa..1f1e1082d9b 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -184,7 +184,7 @@ gre_clone_create(ifc, unit, params) GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), ifc->ifc_name, unit); - GRE2IFP(sc)->if_snd.ifq_maxlen = IFQ_MAXLEN; + GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; GRE2IFP(sc)->if_addrlen = 0; GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ GRE2IFP(sc)->if_mtu = GREMTU; diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 5c082c4926a..1ef5581ae62 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -252,7 +252,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_mtu = IPV6_MMTU; ifp->if_ioctl = stf_ioctl; ifp->if_output = stf_output; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); return (0); diff --git a/sys/net/route.c b/sys/net/route.c index e500ed13033..b45361e7168 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -88,16 +88,15 @@ SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, &rt_add_addr_allfibs, 0, ""); TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); -VNET_DEFINE(struct radix_node_head *, rt_tables); -static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ -VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ VNET_DEFINE(struct rtstat, rtstat); - -#define V_rt_tables VNET(rt_tables) -#define V_rtzone VNET(rtzone) -#define V_rttrash VNET(rttrash) #define V_rtstat VNET(rtstat) +VNET_DEFINE(struct radix_node_head *, rt_tables); +#define V_rt_tables VNET(rt_tables) + +VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ +#define V_rttrash VNET(rttrash) + /* compare two sockaddr structures */ #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) @@ -114,6 +113,9 @@ VNET_DEFINE(struct rtstat, rtstat); */ #define RNTORT(p) ((struct rtentry *)(p)) +static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ +#define V_rtzone VNET(rtzone) + #if 0 /* default fib for tunnels to use */ u_int tunnel_fib = 0; diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index df4f9ae762b..de6548228ea 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -29,6 +29,7 @@ * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ +#include "opt_compat.h" #include "opt_sctp.h" #include "opt_mpath.h" #include "opt_inet.h" @@ -71,6 +72,49 @@ extern void sctp_addr_change(struct ifaddr *ifa, int cmd); #endif /* SCTP */ #endif +#ifdef COMPAT_FREEBSD32 +#include +#include + +struct if_data32 { + uint8_t ifi_type; + uint8_t ifi_physical; + uint8_t ifi_addrlen; + uint8_t ifi_hdrlen; + uint8_t ifi_link_state; + uint8_t ifi_spare_char1; + uint8_t ifi_spare_char2; + uint8_t ifi_datalen; + uint32_t ifi_mtu; + uint32_t ifi_metric; + uint32_t ifi_baudrate; + uint32_t ifi_ipackets; + uint32_t ifi_ierrors; + uint32_t ifi_opackets; + uint32_t ifi_oerrors; + uint32_t ifi_collisions; + uint32_t ifi_ibytes; + uint32_t ifi_obytes; + uint32_t ifi_imcasts; + uint32_t ifi_omcasts; + uint32_t ifi_iqdrops; + uint32_t ifi_noproto; + uint32_t ifi_hwassist; + int32_t ifi_epoch; + struct timeval32 ifi_lastchange; +}; + +struct if_msghdr32 { + uint16_t ifm_msglen; + uint8_t ifm_version; + uint8_t ifm_type; + int32_t ifm_addrs; + int32_t ifm_flags; + uint16_t ifm_index; + struct if_data32 ifm_data; +}; +#endif + MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ @@ -1001,6 +1045,12 @@ again: break; case RTM_IFINFO: +#ifdef COMPAT_FREEBSD32 + if (w != NULL && w->w_req->flags & SCTL_MASK32) { + len = sizeof(struct if_msghdr32); + break; + } +#endif len = sizeof(struct if_msghdr); break; @@ -1367,6 +1417,38 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) return (error); } +#ifdef COMPAT_FREEBSD32 +static void +copy_ifdata32(struct if_data *src, struct if_data32 *dst) +{ + + bzero(dst, sizeof(*dst)); + CP(*src, *dst, ifi_type); + CP(*src, *dst, ifi_physical); + CP(*src, *dst, ifi_addrlen); + CP(*src, *dst, ifi_hdrlen); + CP(*src, *dst, ifi_link_state); + CP(*src, *dst, ifi_datalen); + CP(*src, *dst, ifi_mtu); + CP(*src, *dst, ifi_metric); + CP(*src, *dst, ifi_baudrate); + CP(*src, *dst, ifi_ipackets); + CP(*src, *dst, ifi_ierrors); + CP(*src, *dst, ifi_opackets); + CP(*src, *dst, ifi_oerrors); + CP(*src, *dst, ifi_collisions); + CP(*src, *dst, ifi_ibytes); + CP(*src, *dst, ifi_obytes); + CP(*src, *dst, ifi_imcasts); + CP(*src, *dst, ifi_omcasts); + CP(*src, *dst, ifi_iqdrops); + CP(*src, *dst, ifi_noproto); + CP(*src, *dst, ifi_hwassist); + CP(*src, *dst, ifi_epoch); + TV_CP(*src, *dst, ifi_lastchange); +} +#endif + static int sysctl_iflist(int af, struct walkarg *w) { @@ -1387,12 +1469,30 @@ sysctl_iflist(int af, struct walkarg *w) if (w->w_req && w->w_tmem) { struct if_msghdr *ifm; +#ifdef COMPAT_FREEBSD32 + if (w->w_req->flags & SCTL_MASK32) { + struct if_msghdr32 *ifm32; + + ifm32 = (struct if_msghdr32 *)w->w_tmem; + ifm32->ifm_index = ifp->if_index; + ifm32->ifm_flags = ifp->if_flags | + ifp->if_drv_flags; + copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); + ifm32->ifm_addrs = info.rti_addrs; + error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32, + len); + goto sysctl_out; + } +#endif ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = info.rti_addrs; - error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); + error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len); +#ifdef COMPAT_FREEBSD32 + sysctl_out: +#endif if (error) goto done; } diff --git a/sys/net/vnet.c b/sys/net/vnet.c index 1dac03abf1d..8013f5ed594 100644 --- a/sys/net/vnet.c +++ b/sys/net/vnet.c @@ -153,15 +153,6 @@ struct vnet *vnet0; * module will find every network stack instance with proper default values. */ -/* - * Location of the kernel's 'set_vnet' linker set. - */ -extern uintptr_t *__start_set_vnet; -extern uintptr_t *__stop_set_vnet; - -#define VNET_START (uintptr_t)&__start_set_vnet -#define VNET_STOP (uintptr_t)&__stop_set_vnet - /* * Number of bytes of data in the 'set_vnet' linker set, and hence the total * size of all kernel virtualized global variables, and the malloc(9) type diff --git a/sys/net/vnet.h b/sys/net/vnet.h index fb2cc393cb7..4cdfdef3063 100644 --- a/sys/net/vnet.h +++ b/sys/net/vnet.h @@ -91,6 +91,15 @@ struct vnet { #include #include +/* + * Location of the kernel's 'set_vnet' linker set. + */ +extern uintptr_t *__start_set_vnet; +extern uintptr_t *__stop_set_vnet; + +#define VNET_START (uintptr_t)&__start_set_vnet +#define VNET_STOP (uintptr_t)&__stop_set_vnet + /* * Functions to allocate and destroy virtual network stacks. */ diff --git a/sys/net80211/ieee80211.c b/sys/net80211/ieee80211.c index 3d5669caec5..97ff7c21ad4 100644 --- a/sys/net80211/ieee80211.c +++ b/sys/net80211/ieee80211.c @@ -394,8 +394,8 @@ ieee80211_vap_setup(struct ieee80211com *ic, struct ieee80211vap *vap, ifp->if_ioctl = ieee80211_ioctl; ifp->if_init = ieee80211_init; /* NB: input+output filled in by ether_ifattach */ - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); vap->iv_ifp = ifp; diff --git a/sys/net80211/ieee80211_amrr.c b/sys/net80211/ieee80211_amrr.c index 61110586999..7902545c280 100644 --- a/sys/net80211/ieee80211_amrr.c +++ b/sys/net80211/ieee80211_amrr.c @@ -136,16 +136,16 @@ amrr_node_init(struct ieee80211_node *ni) struct ieee80211_amrr *amrr = vap->iv_rs; struct ieee80211_amrr_node *amn; - KASSERT(ni->ni_rctls == NULL, ("%s: ni_rctls already initialized", - __func__)); - - ni->ni_rctls = amn = malloc(sizeof(struct ieee80211_amrr_node), - M_80211_RATECTL, M_NOWAIT|M_ZERO); - if (amn == NULL) { - if_printf(vap->iv_ifp, "couldn't alloc per-node ratectl " - "structure\n"); - return; - } + if (ni->ni_rctls == NULL) { + ni->ni_rctls = amn = malloc(sizeof(struct ieee80211_amrr_node), + M_80211_RATECTL, M_NOWAIT|M_ZERO); + if (amn == NULL) { + if_printf(vap->iv_ifp, "couldn't alloc per-node ratectl " + "structure\n"); + return; + } + } else + amn = ni->ni_rctls; amn->amn_amrr = amrr; amn->amn_success = 0; amn->amn_recovery = 0; diff --git a/sys/net80211/ieee80211_freebsd.h b/sys/net80211/ieee80211_freebsd.h index 861ac5153e3..abfc2c0a26e 100644 --- a/sys/net80211/ieee80211_freebsd.h +++ b/sys/net80211/ieee80211_freebsd.h @@ -147,6 +147,16 @@ typedef struct mtx acl_lock_t; #define ACL_LOCK_ASSERT(_as) \ mtx_assert((&(_as)->as_lock), MA_OWNED) +/* + * Scan table definitions. + */ +typedef struct mtx ieee80211_scan_table_lock_t; +#define IEEE80211_SCAN_TABLE_LOCK_INIT(_st, _name) \ + mtx_init(&(_st)->st_lock, _name, "802.11 scan table", MTX_DEF) +#define IEEE80211_SCAN_TABLE_LOCK_DESTROY(_st) mtx_destroy(&(_st)->st_lock) +#define IEEE80211_SCAN_TABLE_LOCK(_st) mtx_lock(&(_st)->st_lock) +#define IEEE80211_SCAN_TABLE_UNLOCK(_st) mtx_unlock(&(_st)->st_lock) + /* * Node reference counting definitions. * diff --git a/sys/net80211/ieee80211_ht.h b/sys/net80211/ieee80211_ht.h index 7b0eab7ace1..9cdc1ea9a4d 100644 --- a/sys/net80211/ieee80211_ht.h +++ b/sys/net80211/ieee80211_ht.h @@ -142,7 +142,8 @@ struct ieee80211_rx_ampdu { int rxa_age; /* age of oldest frame in window */ int rxa_nframes; /* frames since ADDBA */ struct mbuf *rxa_m[IEEE80211_AGGR_BAWMAX]; - uint64_t rxa_pad[4]; + void *rxa_private; + uint64_t rxa_pad[3]; }; void ieee80211_ht_attach(struct ieee80211com *); diff --git a/sys/net80211/ieee80211_node.c b/sys/net80211/ieee80211_node.c index b17f42fd51a..992ea0f314c 100644 --- a/sys/net80211/ieee80211_node.c +++ b/sys/net80211/ieee80211_node.c @@ -432,7 +432,7 @@ ieee80211_reset_bss(struct ieee80211vap *vap) ieee80211_reset_erp(ic); ni = ieee80211_alloc_node(&ic->ic_sta, vap, vap->iv_myaddr); - KASSERT(ni != NULL, ("unable to setup inital BSS node")); + KASSERT(ni != NULL, ("unable to setup initial BSS node")); obss = vap->iv_bss; vap->iv_bss = ieee80211_ref_node(ni); if (obss != NULL) { diff --git a/sys/net80211/ieee80211_rssadapt.c b/sys/net80211/ieee80211_rssadapt.c index ad329e0470d..f82c01ee930 100644 --- a/sys/net80211/ieee80211_rssadapt.c +++ b/sys/net80211/ieee80211_rssadapt.c @@ -169,13 +169,17 @@ rssadapt_node_init(struct ieee80211_node *ni) struct ieee80211_rssadapt *rsa = vap->iv_rs; const struct ieee80211_rateset *rs = &ni->ni_rates; - ni->ni_rctls = ra = malloc(sizeof(struct ieee80211_rssadapt_node), - M_80211_RATECTL, M_NOWAIT|M_ZERO); - if (ra == NULL) { - if_printf(vap->iv_ifp, "couldn't alloc per-node ratectl " - "structure\n"); - return; - } + if (ni->ni_rctls == NULL) { + ni->ni_rctls = ra = + malloc(sizeof(struct ieee80211_rssadapt_node), + M_80211_RATECTL, M_NOWAIT|M_ZERO); + if (ra == NULL) { + if_printf(vap->iv_ifp, "couldn't alloc per-node ratectl " + "structure\n"); + return; + } + } else + ra = ni->ni_rctls; ra->ra_rs = rsa; ra->ra_rates = *rs; rssadapt_updatestats(ra); diff --git a/sys/net80211/ieee80211_scan_sta.c b/sys/net80211/ieee80211_scan_sta.c index e697ad4a1a2..a4808e29671 100644 --- a/sys/net80211/ieee80211_scan_sta.c +++ b/sys/net80211/ieee80211_scan_sta.c @@ -97,7 +97,7 @@ struct sta_entry { CTASSERT(MAX_IEEE_CHAN >= 256); struct sta_table { - struct mtx st_lock; /* on scan table */ + ieee80211_scan_table_lock_t st_lock; /* on scan table */ TAILQ_HEAD(, sta_entry) st_entry; /* all entries */ LIST_HEAD(, sta_entry) st_hash[STA_HASHSIZE]; struct mtx st_scanlock; /* on st_scaniter */ @@ -161,7 +161,7 @@ sta_attach(struct ieee80211_scan_state *ss) M_80211_SCAN, M_NOWAIT | M_ZERO); if (st == NULL) return 0; - mtx_init(&st->st_lock, "scantable", "802.11 scan table", MTX_DEF); + IEEE80211_SCAN_TABLE_LOCK_INIT(st, "scantable"); mtx_init(&st->st_scanlock, "scangen", "802.11 scangen", MTX_DEF); TAILQ_INIT(&st->st_entry); ss->ss_priv = st; @@ -179,7 +179,7 @@ sta_detach(struct ieee80211_scan_state *ss) if (st != NULL) { sta_flush_table(st); - mtx_destroy(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK_DESTROY(st); mtx_destroy(&st->st_scanlock); free(st, M_80211_SCAN); KASSERT(nrefs > 0, ("imbalanced attach/detach")); @@ -196,9 +196,9 @@ sta_flush(struct ieee80211_scan_state *ss) { struct sta_table *st = ss->ss_priv; - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); sta_flush_table(st); - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); ss->ss_last = 0; return 0; } @@ -244,14 +244,14 @@ sta_add(struct ieee80211_scan_state *ss, hash = STA_HASH(macaddr); - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); LIST_FOREACH(se, &st->st_hash[hash], se_hash) if (IEEE80211_ADDR_EQ(se->base.se_macaddr, macaddr)) goto found; se = (struct sta_entry *) malloc(sizeof(struct sta_entry), M_80211_SCAN, M_NOWAIT | M_ZERO); if (se == NULL) { - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); return 0; } se->se_scangen = st->st_scaniter-1; @@ -370,7 +370,7 @@ found: if (rssi > st->st_maxrssi[sp->bchan]) st->st_maxrssi[sp->bchan] = rssi; - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); /* * If looking for a quick choice and nothing's @@ -1132,7 +1132,7 @@ sta_update_notseen(struct sta_table *st) { struct sta_entry *se; - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); TAILQ_FOREACH(se, &st->st_entry, se_list) { /* * If seen the reset and don't bump the count; @@ -1146,7 +1146,7 @@ sta_update_notseen(struct sta_table *st) else se->se_notseen++; } - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); } static void @@ -1154,11 +1154,11 @@ sta_dec_fails(struct sta_table *st) { struct sta_entry *se; - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); TAILQ_FOREACH(se, &st->st_entry, se_list) if (se->se_fails) se->se_fails--; - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); } static struct sta_entry * @@ -1169,7 +1169,7 @@ select_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap, int debug) IEEE80211_DPRINTF(vap, debug, " %s\n", "macaddr bssid chan rssi rate flag wep essid"); - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); TAILQ_FOREACH(se, &st->st_entry, se_list) { ieee80211_ies_expand(&se->base.se_ies); if (match_bss(vap, ss, se, debug) == 0) { @@ -1179,7 +1179,7 @@ select_bss(struct ieee80211_scan_state *ss, struct ieee80211vap *vap, int debug) selbs = se; } } - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); return selbs; } @@ -1258,11 +1258,11 @@ sta_lookup(struct sta_table *st, const uint8_t macaddr[IEEE80211_ADDR_LEN]) struct sta_entry *se; int hash = STA_HASH(macaddr); - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); LIST_FOREACH(se, &st->st_hash[hash], se_hash) if (IEEE80211_ADDR_EQ(se->base.se_macaddr, macaddr)) break; - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); return se; /* NB: unlocked */ } @@ -1382,18 +1382,18 @@ sta_iterate(struct ieee80211_scan_state *ss, mtx_lock(&st->st_scanlock); gen = st->st_scaniter++; restart: - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); TAILQ_FOREACH(se, &st->st_entry, se_list) { if (se->se_scangen != gen) { se->se_scangen = gen; /* update public state */ se->base.se_age = ticks - se->se_lastupdate; - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); (*f)(arg, &se->base); goto restart; } } - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); mtx_unlock(&st->st_scanlock); } @@ -1510,7 +1510,7 @@ adhoc_pick_channel(struct ieee80211_scan_state *ss, int flags) bestchan = NULL; bestrssi = -1; - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); for (i = 0; i < ss->ss_last; i++) { c = ss->ss_chans[i]; /* never consider a channel with radar */ @@ -1532,7 +1532,7 @@ adhoc_pick_channel(struct ieee80211_scan_state *ss, int flags) if (bestchan == NULL || maxrssi < bestrssi) bestchan = c; } - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); return bestchan; } @@ -1638,7 +1638,7 @@ adhoc_age(struct ieee80211_scan_state *ss) struct sta_table *st = ss->ss_priv; struct sta_entry *se, *next; - mtx_lock(&st->st_lock); + IEEE80211_SCAN_TABLE_LOCK(st); TAILQ_FOREACH_SAFE(se, &st->st_entry, se_list, next) { if (se->se_notseen > STA_PURGE_SCANS) { TAILQ_REMOVE(&st->st_entry, se, se_list); @@ -1647,7 +1647,7 @@ adhoc_age(struct ieee80211_scan_state *ss) free(se, M_80211_SCAN); } } - mtx_unlock(&st->st_lock); + IEEE80211_SCAN_TABLE_UNLOCK(st); } static const struct ieee80211_scanner adhoc_default = { diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h index ff3694f56a5..eea8dbe2855 100644 --- a/sys/net80211/ieee80211_var.h +++ b/sys/net80211/ieee80211_var.h @@ -213,6 +213,8 @@ struct ieee80211com { enum ieee80211_protmode ic_htprotmode; /* HT protection mode */ int ic_lastnonerp; /* last time non-ERP sta noted*/ int ic_lastnonht; /* last time non-HT sta noted */ + uint8_t ic_rxstream; /* # RX streams */ + uint8_t ic_txstream; /* # TX streams */ /* optional state for Atheros SuperG protocol extensions */ struct ieee80211_superg *ic_superg; @@ -315,7 +317,6 @@ struct ieee80211com { void (*ic_ampdu_rx_stop)(struct ieee80211_node *, struct ieee80211_rx_ampdu *); uint64_t ic_spare[7]; - uint32_t ic_spare2; }; struct ieee80211_aclator; diff --git a/sys/netgraph/ng_base.c b/sys/netgraph/ng_base.c index fdfe878dbc9..2a93e588820 100644 --- a/sys/netgraph/ng_base.c +++ b/sys/netgraph/ng_base.c @@ -3067,28 +3067,42 @@ ng_mod_event(module_t mod, int event, void *data) static void vnet_netgraph_uninit(const void *unused __unused) { -#if 0 - node_p node, last_killed = NULL; + node_p node = NULL, last_killed = NULL; + int i; - /* XXXRW: utterly bogus. */ - while ((node = LIST_FIRST(&V_ng_allnodes)) != NULL) { - if (node == last_killed) { - /* This should never happen */ - node->nd_flags |= NGF_REALLY_DIE; - printf("netgraph node %s needs NGF_REALLY_DIE\n", - node->nd_name); - ng_rmnode(node, NULL, NULL, 0); - /* This must never happen */ - if (node == LIST_FIRST(&V_ng_allnodes)) - panic("netgraph node %s won't die", - node->nd_name); + do { + /* Find a node to kill */ + mtx_lock(&ng_namehash_mtx); + for (i = 0; i < NG_NAME_HASH_SIZE; i++) { + LIST_FOREACH(node, &V_ng_name_hash[i], nd_nodes) { + if (node != &ng_deadnode) { + NG_NODE_REF(node); + break; + } + } + if (node != NULL) + break; } - ng_rmnode(node, NULL, NULL, 0); - last_killed = node; - } -#endif + mtx_unlock(&ng_namehash_mtx); + + /* Attempt to kill it only if it is a regular node */ + if (node != NULL) { + if (node == last_killed) { + /* This should never happen */ + printf("ng node %s needs" + "NGF_REALLY_DIE\n", node->nd_name); + if (node->nd_flags & NGF_REALLY_DIE) + panic("ng node %s won't die", + node->nd_name); + node->nd_flags |= NGF_REALLY_DIE; + } + ng_rmnode(node, NULL, NULL, 0); + NG_NODE_UNREF(node); + last_killed = node; + } + } while (node != NULL); } -VNET_SYSUNINIT(vnet_netgraph_uninit, SI_SUB_NETGRAPH, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_netgraph_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_netgraph_uninit, NULL); #endif /* VIMAGE */ diff --git a/sys/netgraph/ng_bridge.c b/sys/netgraph/ng_bridge.c index 35e13927812..0bf08662353 100644 --- a/sys/netgraph/ng_bridge.c +++ b/sys/netgraph/ng_bridge.c @@ -84,7 +84,7 @@ #include #ifdef NG_SEPARATE_MALLOC -MALLOC_DEFINE(M_NETGRAPH_BRIDGE, "netgraph_bridge", "netgraph bridge node "); +MALLOC_DEFINE(M_NETGRAPH_BRIDGE, "netgraph_bridge", "netgraph bridge node"); #else #define M_NETGRAPH_BRIDGE M_NETGRAPH #endif @@ -106,6 +106,7 @@ struct ng_bridge_private { u_int numBuckets; /* num buckets in table */ u_int hashMask; /* numBuckets - 1 */ int numLinks; /* num connected links */ + int persistent; /* can exist w/o hooks */ struct callout timer; /* one second periodic timer */ }; typedef struct ng_bridge_private *priv_p; @@ -271,6 +272,13 @@ static const struct ng_cmdlist ng_bridge_cmdlist[] = { NULL, &ng_bridge_host_ary_type }, + { + NGM_BRIDGE_COOKIE, + NGM_BRIDGE_SET_PERSISTENT, + "setpersistent", + NULL, + NULL + }, { 0 } }; @@ -495,6 +503,11 @@ ng_bridge_rcvmsg(node_p node, item_p item, hook_p lasthook) } break; } + case NGM_BRIDGE_SET_PERSISTENT: + { + priv->persistent = 1; + break; + } default: error = EINVAL; break; @@ -800,7 +813,8 @@ ng_bridge_disconnect(hook_p hook) /* If no more hooks, go away */ if ((NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0) - && (NG_NODE_IS_VALID(NG_HOOK_NODE(hook)))) { + && (NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) + && !priv->persistent) { ng_rmnode_self(NG_HOOK_NODE(hook)); } return (0); diff --git a/sys/netgraph/ng_bridge.h b/sys/netgraph/ng_bridge.h index 0524a96cb36..c34d27d9966 100644 --- a/sys/netgraph/ng_bridge.h +++ b/sys/netgraph/ng_bridge.h @@ -149,6 +149,7 @@ enum { NGM_BRIDGE_CLR_STATS, /* clear link stats */ NGM_BRIDGE_GETCLR_STATS, /* atomically get & clear link stats */ NGM_BRIDGE_GET_TABLE, /* get link table */ + NGM_BRIDGE_SET_PERSISTENT, /* set persistent mode */ }; #endif /* _NETGRAPH_NG_BRIDGE_H_ */ diff --git a/sys/netgraph/ng_eiface.c b/sys/netgraph/ng_eiface.c index 72cc55caae2..4239328967e 100644 --- a/sys/netgraph/ng_eiface.c +++ b/sys/netgraph/ng_eiface.c @@ -369,7 +369,7 @@ ng_eiface_constructor(node_p node) ifp->if_output = ether_output; ifp->if_start = ng_eiface_start; ifp->if_ioctl = ng_eiface_ioctl; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = (IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST); /* Give this node the same name as the interface (if possible) */ diff --git a/sys/netgraph/ng_fec.c b/sys/netgraph/ng_fec.c index 919947acab4..ce5a2d58f81 100644 --- a/sys/netgraph/ng_fec.c +++ b/sys/netgraph/ng_fec.c @@ -1227,7 +1227,7 @@ ng_fec_constructor(node_p node) ifp->if_start = ng_fec_start; ifp->if_ioctl = ng_fec_ioctl; ifp->if_init = ng_fec_init; - ifp->if_snd.ifq_maxlen = IFQ_MAXLEN; + ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_mtu = NG_FEC_MTU_DEFAULT; ifp->if_flags = (IFF_SIMPLEX|IFF_BROADCAST|IFF_MULTICAST); ifp->if_addrlen = 0; /* XXX */ diff --git a/sys/netgraph/ng_hub.c b/sys/netgraph/ng_hub.c index ba298e56b13..077074575b9 100644 --- a/sys/netgraph/ng_hub.c +++ b/sys/netgraph/ng_hub.c @@ -36,16 +36,46 @@ #include #include +#ifdef NG_SEPARATE_MALLOC +MALLOC_DEFINE(M_NETGRAPH_HUB, "netgraph_hub", "netgraph hub node"); +#else +#define M_NETGRAPH_HUB M_NETGRAPH +#endif + +/* Per-node private data */ +struct ng_hub_private { + int persistent; /* can exist w/o hooks */ +}; +typedef struct ng_hub_private *priv_p; + +/* Netgraph node methods */ static ng_constructor_t ng_hub_constructor; +static ng_rcvmsg_t ng_hub_rcvmsg; +static ng_shutdown_t ng_hub_shutdown; static ng_rcvdata_t ng_hub_rcvdata; static ng_disconnect_t ng_hub_disconnect; +/* List of commands and how to convert arguments to/from ASCII */ +static const struct ng_cmdlist ng_hub_cmdlist[] = { + { + NGM_HUB_COOKIE, + NGM_HUB_SET_PERSISTENT, + "setpersistent", + NULL, + NULL + }, + { 0 } +}; + static struct ng_type ng_hub_typestruct = { .version = NG_ABI_VERSION, .name = NG_HUB_NODE_TYPE, .constructor = ng_hub_constructor, + .rcvmsg = ng_hub_rcvmsg, + .shutdown = ng_hub_shutdown, .rcvdata = ng_hub_rcvdata, .disconnect = ng_hub_disconnect, + .cmdlist = ng_hub_cmdlist, }; NETGRAPH_INIT(hub, &ng_hub_typestruct); @@ -53,10 +83,39 @@ NETGRAPH_INIT(hub, &ng_hub_typestruct); static int ng_hub_constructor(node_p node) { + priv_p priv; + /* Allocate and initialize private info */ + priv = malloc(sizeof(*priv), M_NETGRAPH_HUB, M_NOWAIT | M_ZERO); + if (priv == NULL) + return (ENOMEM); + + NG_NODE_SET_PRIVATE(node, priv); return (0); } +/* + * Receive a control message + */ +static int +ng_hub_rcvmsg(node_p node, item_p item, hook_p lasthook) +{ + const priv_p priv = NG_NODE_PRIVATE(node); + int error = 0; + struct ng_mesg *msg; + + NGI_GET_MSG(item, msg); + if (msg->header.typecookie == NGM_HUB_COOKIE && + msg->header.cmd == NGM_HUB_SET_PERSISTENT) { + priv->persistent = 1; + } else { + error = EINVAL; + } + + NG_FREE_MSG(msg); + return (error); +} + static int ng_hub_rcvdata(hook_p hook, item_p item) { @@ -89,12 +148,25 @@ ng_hub_rcvdata(hook_p hook, item_p item) return (error); } +/* + * Shutdown node + */ +static int +ng_hub_shutdown(node_p node) +{ + const priv_p priv = NG_NODE_PRIVATE(node); + + free(priv, M_NETGRAPH_HUB); + return (0); +} + static int ng_hub_disconnect(hook_p hook) { + const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0 && - NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) + NG_NODE_IS_VALID(NG_HOOK_NODE(hook)) && !priv->persistent) ng_rmnode_self(NG_HOOK_NODE(hook)); return (0); } diff --git a/sys/netgraph/ng_hub.h b/sys/netgraph/ng_hub.h index a735d7b64df..d6b16e076c3 100644 --- a/sys/netgraph/ng_hub.h +++ b/sys/netgraph/ng_hub.h @@ -33,4 +33,9 @@ #define NG_HUB_NODE_TYPE "hub" #define NGM_HUB_COOKIE 1082189597 +/* Netgraph control messages */ +enum { + NGM_HUB_SET_PERSISTENT = 1, /* set persistent mode */ +}; + #endif /* _NETGRAPH_NG_HUB_H_ */ diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c index d53bf768b10..f94fd14930f 100644 --- a/sys/netgraph/ng_iface.c +++ b/sys/netgraph/ng_iface.c @@ -564,8 +564,8 @@ ng_iface_constructor(node_p node) ifp->if_addrlen = 0; /* XXX */ ifp->if_hdrlen = 0; /* XXX */ ifp->if_baudrate = 64000; /* XXX */ - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* Give this node the same name as the interface (if possible) */ diff --git a/sys/netgraph/ng_ksocket.c b/sys/netgraph/ng_ksocket.c index 260eba90d7e..f6cbccca97c 100644 --- a/sys/netgraph/ng_ksocket.c +++ b/sys/netgraph/ng_ksocket.c @@ -1272,7 +1272,7 @@ ng_ksocket_finish_accept(priv_p priv) soupcall_set(so, SO_RCV, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); - soupcall_set(so, SO_RCV, ng_ksocket_incoming, node); + soupcall_set(so, SO_SND, ng_ksocket_incoming, node); SOCKBUF_UNLOCK(&so->so_snd); /* Fill in the response data and send it or return it to the caller */ diff --git a/sys/netgraph/ng_pipe.c b/sys/netgraph/ng_pipe.c index a094646e660..a4546248dd9 100644 --- a/sys/netgraph/ng_pipe.c +++ b/sys/netgraph/ng_pipe.c @@ -816,14 +816,17 @@ pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) { } /* Randomly discard the frame, according to BER setting */ - if (hinfo->cfg.ber && - ((oldrand = rand) ^ (rand = random())<<17) >= - hinfo->ber_p[priv->overhead + m->m_pkthdr.len] ) { - hinfo->stats.out_disc_frames++; - hinfo->stats.out_disc_octets += m->m_pkthdr.len; - uma_zfree(ngp_zone, ngp_h); - m_freem(m); - continue; + if (hinfo->cfg.ber) { + oldrand = rand; + rand = random(); + if (((oldrand ^ rand) << 17) >= + hinfo->ber_p[priv->overhead + m->m_pkthdr.len]) { + hinfo->stats.out_disc_frames++; + hinfo->stats.out_disc_octets += m->m_pkthdr.len; + uma_zfree(ngp_zone, ngp_h); + m_freem(m); + continue; + } } /* Discard frame if outbound queue size limit exceeded */ diff --git a/sys/netgraph/ng_tty.c b/sys/netgraph/ng_tty.c index 2fa33fc8005..84083170d8a 100644 --- a/sys/netgraph/ng_tty.c +++ b/sys/netgraph/ng_tty.c @@ -164,7 +164,7 @@ ngt_constructor(node_p node) sc->node = node; mtx_init(&sc->outq.ifq_mtx, "ng_tty node+queue", NULL, MTX_DEF); - IFQ_SET_MAXLEN(&sc->outq, IFQ_MAXLEN); + IFQ_SET_MAXLEN(&sc->outq, ifqmaxlen); return (0); } diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h index c3a8ac7fcac..5faae7c1f2d 100644 --- a/sys/netinet/icmp6.h +++ b/sys/netinet/icmp6.h @@ -652,7 +652,6 @@ struct rtentry; struct rttimer; struct in6_multi; # endif -void icmp6_init(void); void icmp6_paramerror(struct mbuf *, int); void icmp6_error(struct mbuf *, int, int, int); void icmp6_error2(struct mbuf *, int, int, int, struct ifnet *); diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 25fba9f10eb..08a162ff911 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -82,16 +82,15 @@ SYSCTL_DECL(_net_link_ether); SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); -VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for - * local traffic */ - /* timer values */ static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ +static VNET_DEFINE(int, arp_maxtries) = 5; +VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for + * local traffic */ +static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ -static VNET_DEFINE(int, arp_maxtries) = 5; -static VNET_DEFINE(int, arp_proxyall); static VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ #define V_arpt_keep VNET(arpt_keep) @@ -103,7 +102,6 @@ static VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); - SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, &VNET_NAME(arp_maxtries), 0, "ARP resolution attempts before returning error"); diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index f9f6381f3fc..d4c8e99cdab 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -1468,12 +1468,6 @@ igmp_input(struct mbuf *m, int off) } ip = mtod(m, struct ip *); - if (ip->ip_ttl != 1) { - IGMPSTAT_INC(igps_rcv_badttl); - m_freem(m); - return; - } - /* * Validate checksum. */ @@ -1488,6 +1482,17 @@ igmp_input(struct mbuf *m, int off) m->m_data -= iphlen; m->m_len += iphlen; + /* + * IGMP control traffic is link-scope, and must have a TTL of 1. + * DVMRP traffic (e.g. mrinfo, mtrace) is an exception; + * probe packets may come from beyond the LAN. + */ + if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) { + IGMPSTAT_INC(igps_rcv_badttl); + m_freem(m); + return; + } + switch (igmp->igmp_type) { case IGMP_HOST_MEMBERSHIP_QUERY: if (igmplen == IGMP_MINLEN) { diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 0a0fe472590..96bfa0ea67d 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -77,20 +77,19 @@ static int in_ifinit(struct ifnet *, static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, subnetsarelocal); -static VNET_DEFINE(int, sameprefixcarponly); -VNET_DECLARE(struct inpcbinfo, ripcbinfo); - #define V_subnetsarelocal VNET(subnetsarelocal) -#define V_sameprefixcarponly VNET(sameprefixcarponly) -#define V_ripcbinfo VNET(ripcbinfo) - SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, &VNET_NAME(subnetsarelocal), 0, "Treat all subnets as directly connected"); +static VNET_DEFINE(int, sameprefixcarponly); +#define V_sameprefixcarponly VNET(sameprefixcarponly) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, &VNET_NAME(sameprefixcarponly), 0, "Refuse to create same prefixes on different interfaces"); +VNET_DECLARE(struct inpcbinfo, ripcbinfo); +#define V_ripcbinfo VNET(ripcbinfo) + /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). If subnetsarelocal diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 44b9961d44b..6c60390aa6a 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -85,6 +85,8 @@ struct protosw in_gif_protosw = { .pr_usrreqs = &rip_usrreqs }; +VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL; +#define V_ip_gif_ttl VNET(ip_gif_ttl) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW, &VNET_NAME(ip_gif_ttl), 0, ""); diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index d9cab845aee..6d72fbe3da3 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -208,7 +208,6 @@ struct protosw inetsw[] = { .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = icmp_input, .pr_ctloutput = rip_ctloutput, - .pr_init = icmp_init, .pr_usrreqs = &rip_usrreqs }, { diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index 6516277d0c4..5b8e0f619c8 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -131,22 +131,22 @@ in_matroute(void *v_arg, struct radix_node_head *head) return rn; } -static VNET_DEFINE(int, rtq_reallyold); -static VNET_DEFINE(int, rtq_minreallyold); -static VNET_DEFINE(int, rtq_toomany); - +static VNET_DEFINE(int, rtq_reallyold) = 60*60; /* one hour is "really old" */ #define V_rtq_reallyold VNET(rtq_reallyold) -#define V_rtq_minreallyold VNET(rtq_minreallyold) -#define V_rtq_toomany VNET(rtq_toomany) - SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, &VNET_NAME(rtq_reallyold), 0, "Default expiration time on dynamically learned routes"); +/* never automatically crank down to less */ +static VNET_DEFINE(int, rtq_minreallyold) = 10; +#define V_rtq_minreallyold VNET(rtq_minreallyold) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, &VNET_NAME(rtq_minreallyold), 0, "Minimum time to attempt to hold onto dynamically learned routes"); +/* 128 cached routes is "too many" */ +static VNET_DEFINE(int, rtq_toomany) = 128; +#define V_rtq_toomany VNET(rtq_toomany) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, &VNET_NAME(rtq_toomany), 0, "Upper limit on dynamically learned routes"); @@ -239,7 +239,7 @@ in_rtqkill(struct radix_node *rn, void *rock) } #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ -static VNET_DEFINE(int, rtq_timeout); +static VNET_DEFINE(int, rtq_timeout) = RTQ_TIMEOUT; static VNET_DEFINE(struct callout, rtq_timer); #define V_rtq_timeout VNET(rtq_timeout) @@ -362,11 +362,6 @@ in_inithead(void **head, int off) if (off == 0) /* XXX MRT see above */ return 1; /* only do the rest for a real routing table */ - V_rtq_reallyold = 60*60; /* one hour is "really old" */ - V_rtq_minreallyold = 10; /* never automatically crank down to less */ - V_rtq_toomany = 128; /* 128 cached routes is "too many" */ - V_rtq_timeout = RTQ_TIMEOUT; - rnh = *head; rnh->rnh_addaddr = in_addroute; rnh->rnh_matchaddr = in_matroute; diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index 0bbc32638c9..dc2c3412a08 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -87,14 +87,14 @@ enum { DN_SYSCTL_SET, DN_LAST, -} ; +}; enum { /* subtype for schedulers, flowset and the like */ DN_SCHED_UNKNOWN = 0, DN_SCHED_FIFO = 1, DN_SCHED_WF2QP = 2, /* others are in individual modules */ -} ; +}; enum { /* user flags */ DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */ @@ -113,16 +113,16 @@ enum { /* user flags */ struct dn_link { struct dn_id oid; - /* + /* * Userland sets bw and delay in bits/s and milliseconds. * The kernel converts this back and forth to bits/tick and ticks. * XXX what about burst ? - */ + */ int32_t link_nr; int bandwidth; /* bit/s or bits/tick. */ int delay; /* ms and ticks */ uint64_t burst; /* scaled. bits*Hz XXX */ -} ; +}; /* * A flowset, which is a template for flows. Contains parameters @@ -132,13 +132,13 @@ struct dn_link { */ struct dn_fs { struct dn_id oid; - uint32_t fs_nr; /* the flowset number */ - uint32_t flags; /* userland flags */ - int qsize ; /* queue size in slots or bytes */ - int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ + uint32_t fs_nr; /* the flowset number */ + uint32_t flags; /* userland flags */ + int qsize; /* queue size in slots or bytes */ + int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ uint32_t buckets; /* buckets used for the queue hash table */ - struct ipfw_flow_id flow_mask ; + struct ipfw_flow_id flow_mask; uint32_t sched_nr; /* the scheduler we attach to */ /* generic scheduler parameters. Leave them at -1 if unset. * Now we use 0: weight, 1: lmax, 2: priority @@ -149,14 +149,14 @@ struct dn_fs { * weight and probabilities are in the range 0..1 represented * in fixed point arithmetic with SCALE_RED decimal bits. */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ }; @@ -177,10 +177,10 @@ struct dn_flow { }; - /* +/* * Scheduler template, mostly indicating the name, number, * sched_mask and buckets. - */ + */ struct dn_sch { struct dn_id oid; uint32_t sched_nr; /* N, scheduler number */ @@ -199,14 +199,14 @@ struct dn_sch { #define ED_MAX_SAMPLES_NO 1024 struct dn_profile { struct dn_id oid; - /* fields to simulate a delay profile */ + /* fields to simulate a delay profile */ #define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int link_nr; - int loss_level; - int bandwidth; // XXX use link bandwidth? - int samples_no; /* actual length of samples[] */ - int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ + char name[ED_MAX_NAME_LEN]; + int link_nr; + int loss_level; + int bandwidth; // XXX use link bandwidth? + int samples_no; /* actual len of samples[] */ + int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ }; diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index b13bc7ca827..8dc08d9336c 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -75,65 +75,60 @@ __FBSDID("$FreeBSD$"); * routines to turnaround packets back to the originator, and * host table maintenance routines. */ - VNET_DEFINE(struct icmpstat, icmpstat); -static VNET_DEFINE(int, icmpmaskrepl); -static VNET_DEFINE(u_int, icmpmaskfake); -static VNET_DEFINE(int, drop_redirect); -static VNET_DEFINE(int, log_redirect); -static VNET_DEFINE(int, icmplim); -static VNET_DEFINE(int, icmplim_output); -static VNET_DEFINE(char, reply_src[IFNAMSIZ]); -static VNET_DEFINE(int, icmp_rfi); -static VNET_DEFINE(int, icmp_quotelen); -static VNET_DEFINE(int, icmpbmcastecho); - -#define V_icmpmaskrepl VNET(icmpmaskrepl) -#define V_icmpmaskfake VNET(icmpmaskfake) -#define V_drop_redirect VNET(drop_redirect) -#define V_log_redirect VNET(log_redirect) -#define V_icmplim VNET(icmplim) -#define V_icmplim_output VNET(icmplim_output) -#define V_reply_src VNET(reply_src) -#define V_icmp_rfi VNET(icmp_rfi) -#define V_icmp_quotelen VNET(icmp_quotelen) -#define V_icmpbmcastecho VNET(icmpbmcastecho) - SYSCTL_VNET_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(icmpstat), icmpstat, ""); +static VNET_DEFINE(int, icmpmaskrepl) = 0; +#define V_icmpmaskrepl VNET(icmpmaskrepl) SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, &VNET_NAME(icmpmaskrepl), 0, "Reply to ICMP Address Mask Request packets."); +static VNET_DEFINE(u_int, icmpmaskfake) = 0; +#define V_icmpmaskfake VNET(icmpmaskfake) SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_RW, &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets."); +static VNET_DEFINE(int, drop_redirect) = 0; +#define V_drop_redirect VNET(drop_redirect) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, &VNET_NAME(drop_redirect), 0, "Ignore ICMP redirects"); +static VNET_DEFINE(int, log_redirect) = 0; +#define V_log_redirect VNET(log_redirect) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, &VNET_NAME(log_redirect), 0, "Log ICMP redirects to the console"); +static VNET_DEFINE(int, icmplim) = 200; +#define V_icmplim VNET(icmplim) SYSCTL_VNET_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, &VNET_NAME(icmplim), 0, "Maximum number of ICMP responses per second"); +static VNET_DEFINE(int, icmplim_output) = 1; +#define V_icmplim_output VNET(icmplim_output) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, &VNET_NAME(icmplim_output), 0, "Enable rate limiting of ICMP responses"); +static VNET_DEFINE(char, reply_src[IFNAMSIZ]); +#define V_reply_src VNET(reply_src) SYSCTL_VNET_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW, &VNET_NAME(reply_src), IFNAMSIZ, "icmp reply source for non-local packets."); +static VNET_DEFINE(int, icmp_rfi) = 0; +#define V_icmp_rfi VNET(icmp_rfi) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW, &VNET_NAME(icmp_rfi), 0, "ICMP reply from incoming interface for non-local packets"); +static VNET_DEFINE(int, icmp_quotelen) = 8; +#define V_icmp_quotelen VNET(icmp_quotelen) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW, &VNET_NAME(icmp_quotelen), 0, "Number of bytes from original packet to quote in ICMP reply"); @@ -141,7 +136,8 @@ SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_RW, /* * ICMP broadcast echo sysctl */ - +static VNET_DEFINE(int, icmpbmcastecho) = 0; +#define V_icmpbmcastecho VNET(icmpbmcastecho) SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, &VNET_NAME(icmpbmcastecho), 0, ""); @@ -156,21 +152,6 @@ static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; -void -icmp_init(void) -{ - - V_icmpmaskrepl = 0; - V_icmpmaskfake = 0; - V_drop_redirect = 0; - V_log_redirect = 0; - V_icmplim = 200; - V_icmplim_output = 1; - V_icmp_rfi = 0; - V_icmp_quotelen = 8; - V_icmpbmcastecho = 0; -} - /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the diff --git a/sys/netinet/ip_icmp.h b/sys/netinet/ip_icmp.h index e4ee7f7402c..9cabdb58c57 100644 --- a/sys/netinet/ip_icmp.h +++ b/sys/netinet/ip_icmp.h @@ -208,7 +208,6 @@ struct icmp { #ifdef _KERNEL void icmp_error(struct mbuf *, int, int, uint32_t, int); void icmp_input(struct mbuf *, int); -void icmp_init(void); int ip_next_mtu(int, int); #endif diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 8be51fb992d..a17907c397c 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -89,66 +89,40 @@ __FBSDID("$FreeBSD$"); CTASSERT(sizeof(struct ip) == 20); #endif -static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ -static VNET_DEFINE(int, ip_checkinterface); -static VNET_DEFINE(int, ip_keepfaith); -static VNET_DEFINE(int, ip_sendsourcequench); - -#define V_ipsendredirects VNET(ipsendredirects) -#define V_ip_checkinterface VNET(ip_checkinterface) -#define V_ip_keepfaith VNET(ip_keepfaith) -#define V_ip_sendsourcequench VNET(ip_sendsourcequench) - -VNET_DEFINE(int, ip_defttl) = IPDEFTTL; -VNET_DEFINE(int, ip_do_randomid); -VNET_DEFINE(int, ipforwarding); - -VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ -VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ -VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ -VNET_DEFINE(struct ipstat, ipstat); - -static VNET_DEFINE(int, ip_rsvp_on); -VNET_DEFINE(struct socket *, ip_rsvpd); -VNET_DEFINE(int, rsvp_on); - -#define V_ip_rsvp_on VNET(ip_rsvp_on) - -static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]); -static VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */ -static VNET_DEFINE(int, maxfragsperpacket); -static VNET_DEFINE(int, nipq); /* Total # of reass queues */ - -#define V_ipq VNET(ipq) -#define V_maxnipq VNET(maxnipq) -#define V_maxfragsperpacket VNET(maxfragsperpacket) -#define V_nipq VNET(nipq) - -VNET_DEFINE(int, ipstealth); - struct rwlock in_ifaddr_lock; RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); +VNET_DEFINE(int, rsvp_on); + +VNET_DEFINE(int, ipforwarding); SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, &VNET_NAME(ipforwarding), 0, "Enable IP forwarding between interfaces"); +static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ +#define V_ipsendredirects VNET(ipsendredirects) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); +VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); +static VNET_DEFINE(int, ip_keepfaith); +#define V_ip_keepfaith VNET(ip_keepfaith) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, &VNET_NAME(ip_keepfaith), 0, "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); +static VNET_DEFINE(int, ip_sendsourcequench); +#define V_ip_sendsourcequench VNET(ip_sendsourcequench) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, &VNET_NAME(ip_sendsourcequench), 0, "Enable the transmission of source quench packets"); +VNET_DEFINE(int, ip_do_randomid); SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, &VNET_NAME(ip_do_randomid), 0, "Assign random ip_id values"); @@ -166,6 +140,8 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, * to the loopback interface instead of the interface where the * packets for those addresses are received. */ +static VNET_DEFINE(int, ip_checkinterface); +#define V_ip_checkinterface VNET(ip_checkinterface) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, &VNET_NAME(ip_checkinterface), 0, "Verify packet arrives on correct interface"); @@ -182,16 +158,22 @@ static struct netisr_handler ip_nh = { extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; +VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ +VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ +VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ +VNET_DEFINE(struct ipstat, ipstat); SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(ipstat), ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); static VNET_DEFINE(uma_zone_t, ipq_zone); -#define V_ipq_zone VNET(ipq_zone) - +static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]); static struct mtx ipqlock; +#define V_ipq_zone VNET(ipq_zone) +#define V_ipq VNET(ipq) + #define IPQ_LOCK() mtx_lock(&ipqlock) #define IPQ_UNLOCK() mtx_unlock(&ipqlock) #define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) @@ -201,10 +183,16 @@ static void maxnipq_update(void); static void ipq_zone_change(void *); static void ip_drain_locked(void); +static VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */ +static VNET_DEFINE(int, nipq); /* Total # of reass queues */ +#define V_maxnipq VNET(maxnipq) +#define V_nipq VNET(nipq) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &VNET_NAME(nipq), 0, "Current number of IPv4 fragment reassembly queue entries"); +static VNET_DEFINE(int, maxfragsperpacket); +#define V_maxfragsperpacket VNET(maxfragsperpacket) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, &VNET_NAME(maxfragsperpacket), 0, "Maximum number of IPv4 fragments allowed per packet"); @@ -217,6 +205,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, #endif #ifdef IPSTEALTH +VNET_DEFINE(int, ipstealth); SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, &VNET_NAME(ipstealth), 0, "IP stealth mode, no TTL decrementation on forwarding"); @@ -1590,7 +1579,7 @@ ip_forward(struct mbuf *m, int srcrt) * If IPsec is configured for this path, * override any possibly mtu value set by ip_output. */ - mtu = ip_ipsec_mtu(m, mtu); + mtu = ip_ipsec_mtu(mcopy, mtu); #endif /* IPSEC */ /* * If the MTU was set before make sure we are below the @@ -1740,6 +1729,11 @@ makedummy: * locking. This code remains in ip_input.c as ip_mroute.c is optionally * compiled. */ +static VNET_DEFINE(int, ip_rsvp_on); +VNET_DEFINE(struct socket *, ip_rsvpd); + +#define V_ip_rsvp_on VNET(ip_rsvp_on) + int ip_rsvp_init(struct socket *so) { diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 389ad6ee316..4415001fc27 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -181,9 +181,13 @@ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ #ifdef IPSTEALTH VNET_DECLARE(int, ipstealth); /* stealth forwarding */ #endif -VNET_DECLARE(int, rsvp_on); +extern u_char ip_protox[]; VNET_DECLARE(struct socket *, ip_rsvpd); /* reservation protocol daemon*/ VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */ +extern int (*legal_vif_num)(int); +extern u_long (*ip_mcast_src)(int); +VNET_DECLARE(int, rsvp_on); +extern struct pr_usrreqs rip_usrreqs; #define V_ipstat VNET(ipstat) #define V_ip_id VNET(ip_id) @@ -192,14 +196,9 @@ VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */ #ifdef IPSTEALTH #define V_ipstealth VNET(ipstealth) #endif -#define V_rsvp_on VNET(rsvp_on) #define V_ip_rsvpd VNET(ip_rsvpd) #define V_ip_mrouter VNET(ip_mrouter) - -extern u_char ip_protox[]; -extern int (*legal_vif_num)(int); -extern u_long (*ip_mcast_src)(int); -extern struct pr_usrreqs rip_usrreqs; +#define V_rsvp_on VNET(rsvp_on) void inp_freemoptions(struct ip_moptions *); int inp_getmoptions(struct inpcb *, struct sockopt *); diff --git a/sys/netinet/ipfw/dn_sched.h b/sys/netinet/ipfw/dn_sched.h index fe54b0205cf..b6bf24e466a 100644 --- a/sys/netinet/ipfw/dn_sched.h +++ b/sys/netinet/ipfw/dn_sched.h @@ -140,9 +140,9 @@ struct dn_alg { /* MSVC does not support initializers so we need this ugly macro */ #ifdef _WIN32 -#define _SI(fld) +#define _SI(fld) #else -#define _SI(fld) fld +#define _SI(fld) fld #endif /* diff --git a/sys/netinet/ipfw/dn_sched_rr.c b/sys/netinet/ipfw/dn_sched_rr.c index fc7be001b30..1bbd80057c3 100644 --- a/sys/netinet/ipfw/dn_sched_rr.c +++ b/sys/netinet/ipfw/dn_sched_rr.c @@ -94,7 +94,7 @@ rr_remove_head(struct rr_si *si) if (si->head == NULL) return; /* empty queue */ si->head->status = 0; - + if (si->head == si->tail) { si->head = si->tail = NULL; return; @@ -141,7 +141,7 @@ next_pointer(struct rr_si *si) si->tail = si->tail->qnext; } -static int +static int rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) { struct rr_si *si; @@ -154,7 +154,7 @@ rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) return 0; } - /* If reach this point, queue q was idle */ + /* If reach this point, queue q was idle */ si = (struct rr_si *)(_si + 1); rrq = (struct rr_queue *)q; diff --git a/sys/netinet/ipfw/dn_sched_wf2q.c b/sys/netinet/ipfw/dn_sched_wf2q.c index 1fbc1202e40..55a49550b7f 100644 --- a/sys/netinet/ipfw/dn_sched_wf2q.c +++ b/sys/netinet/ipfw/dn_sched_wf2q.c @@ -125,7 +125,7 @@ idle_check(struct wf2qp_si *si, int n, int force) } } -static int +static int wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) { struct dn_fsk *fs = q->fs; @@ -140,7 +140,7 @@ wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) return 0; } - /* If reach this point, queue q was idle */ + /* If reach this point, queue q was idle */ alg_fq = (struct wf2qp_queue *)q; if (DN_KEY_LT(alg_fq->F, alg_fq->S)) { @@ -318,7 +318,7 @@ wf2qp_free_queue(struct dn_queue *q) { struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q; struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1); - + if (alg_fq->S >= alg_fq->F + 1) return 0; /* nothing to do, not in any heap */ si->wsum -= q->fs->fs.par[0]; @@ -361,7 +361,7 @@ static struct dn_alg wf2qp_desc = { _SI( .destroy = ) NULL, _SI( .new_sched = ) wf2qp_new_sched, _SI( .free_sched = ) wf2qp_free_sched, - + _SI( .new_fsk = ) wf2qp_new_fsk, _SI( .free_fsk = ) NULL, diff --git a/sys/netinet/ipfw/ip_dn_private.h b/sys/netinet/ipfw/ip_dn_private.h index f1a7f3fa509..03b43dba55d 100644 --- a/sys/netinet/ipfw/ip_dn_private.h +++ b/sys/netinet/ipfw/ip_dn_private.h @@ -149,7 +149,7 @@ struct dn_parms { int drain_sch; uint32_t expire; uint32_t expire_cycle; /* tick count */ - + int init_done; /* if the upper half is busy doing something long, diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c index 0a6d16dc018..01714aa6634 100644 --- a/sys/netinet/ipfw/ip_dummynet.c +++ b/sys/netinet/ipfw/ip_dummynet.c @@ -1547,28 +1547,28 @@ config_profile(struct dn_profile *pf, struct dn_id *arg) /* XXX other sanity checks */ DN_BH_WLOCK(); for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { - s = locate_scheduler(i); + s = locate_scheduler(i); - if (s == NULL) { + if (s == NULL) { err = EINVAL; break; - } - dn_cfg.id++; - /* - * If we had a profile and the new one does not fit, - * or it is deleted, then we need to free memory. - */ - if (s->profile && (pf->samples_no == 0 || - s->profile->oid.len < pf->oid.len)) { - free(s->profile, M_DUMMYNET); - s->profile = NULL; - } + } + dn_cfg.id++; + /* + * If we had a profile and the new one does not fit, + * or it is deleted, then we need to free memory. + */ + if (s->profile && (pf->samples_no == 0 || + s->profile->oid.len < pf->oid.len)) { + free(s->profile, M_DUMMYNET); + s->profile = NULL; + } if (pf->samples_no == 0) continue; - /* + /* * new profile, possibly allocate memory - * and copy data. - */ + * and copy data. + */ if (s->profile == NULL) s->profile = malloc(pf->oid.len, M_DUMMYNET, M_NOWAIT | M_ZERO); @@ -1642,7 +1642,8 @@ do_config(void *p, int l) default: D("cmd %d not implemented", o->type); break; -#ifdef EMULATE_SYSCTL + +#ifdef EMULATE_SYSCTL /* sysctl emulation. * if we recognize the command, jump to the correct * handler and return @@ -1651,6 +1652,7 @@ do_config(void *p, int l) err = kesysctl_emu_set(p, l); return err; #endif + case DN_CMD_CONFIG: /* simply a header */ break; diff --git a/sys/netinet/ipfw/ip_fw_pfil.c b/sys/netinet/ipfw/ip_fw_pfil.c index b4e31d4c8c8..e87a4c973fe 100644 --- a/sys/netinet/ipfw/ip_fw_pfil.c +++ b/sys/netinet/ipfw/ip_fw_pfil.c @@ -147,8 +147,8 @@ again: switch (ipfw) { case IP_FW_PASS: /* next_hop may be set by ipfw_chk */ - if (args.next_hop == NULL) - break; /* pass */ + if (args.next_hop == NULL) + break; /* pass */ #ifndef IPFIREWALL_FORWARD ret = EACCES; #else @@ -347,14 +347,14 @@ ipfw_attach_hooks(int arg) if (arg == 0) /* detach */ ipfw_hook(0, AF_INET); - else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) { + else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) { error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ printf("ipfw_hook() error\n"); } #ifdef INET6 if (arg == 0) /* detach */ ipfw_hook(0, AF_INET6); - else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) { + else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) { error = ENOENT; printf("ipfw6_hook() error\n"); } diff --git a/sys/netinet/ipfw/ip_fw_private.h b/sys/netinet/ipfw/ip_fw_private.h index c29ae0ad90b..ac55433750a 100644 --- a/sys/netinet/ipfw/ip_fw_private.h +++ b/sys/netinet/ipfw/ip_fw_private.h @@ -214,7 +214,7 @@ struct ip_fw_chain { struct ip_fw *default_rule; int n_rules; /* number of static rules */ int static_len; /* total len of static rules */ - struct ip_fw **map; /* array of rule ptrs to ease lookup */ + struct ip_fw **map; /* array of rule ptrs to ease lookup */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; #if defined( __linux__ ) || defined( _WIN32 ) diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 1db37746c83..0b77b5b8496 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -427,11 +427,24 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst) ip->ip_p = inp->inp_ip_p; ip->ip_len = m->m_pkthdr.len; ip->ip_src = inp->inp_laddr; - error = prison_get_ip4(inp->inp_cred, &ip->ip_src); - if (error != 0) { - INP_RUNLOCK(inp); - m_freem(m); - return (error); + if (jailed(inp->inp_cred)) { + /* + * prison_local_ip4() would be good enough but would + * let a source of INADDR_ANY pass, which we do not + * want to see from jails. We do not go through the + * pain of in_pcbladdr() for raw sockets. + */ + if (ip->ip_src.s_addr == INADDR_ANY) + error = prison_get_ip4(inp->inp_cred, + &ip->ip_src); + else + error = prison_local_ip4(inp->inp_cred, + &ip->ip_src); + if (error != 0) { + INP_RUNLOCK(inp); + m_freem(m); + return (error); + } } ip->ip_dst.s_addr = dst; ip->ip_ttl = inp->inp_ip_ttl; diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c index f7de414ed1a..2d16ef01bae 100644 --- a/sys/netinet/sctp_asconf.c +++ b/sys/netinet/sctp_asconf.c @@ -1113,7 +1113,7 @@ sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet) } SCTP_TCB_LOCK_ASSERT(stcb); #ifdef SCTP_AUDITING_ENABLED - sctp_auditing(4, stcb->sctp_ep, stcb->asoc.deleted_primary); + sctp_auditing(4, stcb->sctp_ep, stcb, stcb->asoc.deleted_primary); #endif sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED); if ((stcb->asoc.num_send_timers_up == 0) && diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index 53dcf8f2a6f..2ed6c1660d7 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -1466,7 +1466,7 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc, asoc->send_sack = 1; } protocol_id = ch->dp.protocol_id; - ordered = ((ch->ch.chunk_flags & SCTP_DATA_UNORDERED) == 0); + ordered = ((chunk_flags & SCTP_DATA_UNORDERED) == 0); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS); } @@ -2123,6 +2123,10 @@ failed_pdapi_express_del: } } finish_express_del: + if (tsn == (asoc->cumulative_tsn + 1)) { + /* Update cum-ack */ + asoc->cumulative_tsn = tsn; + } if (last_chunk) { *m = NULL; } @@ -2300,7 +2304,12 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) if ((asoc->cumulative_tsn == highest_tsn) && (at >= 8)) { /* The complete array was completed by a single FR */ /* highest becomes the cum-ack */ - int clr, i; + int clr; + +#ifdef INVARIANTS + unsigned int i; + +#endif /* clear the array */ clr = ((at + 7) >> 3); @@ -2309,12 +2318,14 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) } memset(asoc->mapping_array, 0, clr); memset(asoc->nr_mapping_array, 0, clr); +#ifdef INVARIANTS for (i = 0; i < asoc->mapping_array_size; i++) { if ((asoc->mapping_array[i]) || (asoc->nr_mapping_array[i])) { printf("Error Mapping array's not clean at clear\n"); sctp_print_mapping_array(asoc); } } +#endif asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1; asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map = asoc->cumulative_tsn; } else if (at >= 8) { @@ -2371,16 +2382,20 @@ sctp_slide_mapping_arrays(struct sctp_tcb *stcb) int ii; for (ii = 0; ii < distance; ii++) { - asoc->mapping_array[ii] = - asoc->mapping_array[slide_from + ii]; - asoc->nr_mapping_array[ii] = - asoc->nr_mapping_array[slide_from + ii]; + asoc->mapping_array[ii] = asoc->mapping_array[slide_from + ii]; + asoc->nr_mapping_array[ii] = asoc->nr_mapping_array[slide_from + ii]; } for (ii = distance; ii < asoc->mapping_array_size; ii++) { asoc->mapping_array[ii] = 0; asoc->nr_mapping_array[ii] = 0; } + if (asoc->highest_tsn_inside_map + 1 == asoc->mapping_array_base_tsn) { + asoc->highest_tsn_inside_map += (slide_from << 3); + } + if (asoc->highest_tsn_inside_nr_map + 1 == asoc->mapping_array_base_tsn) { + asoc->highest_tsn_inside_nr_map += (slide_from << 3); + } asoc->mapping_array_base_tsn += (slide_from << 3); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) { sctp_log_map(asoc->mapping_array_base_tsn, @@ -2808,25 +2823,7 @@ sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length, stcb->asoc.send_sack = 1; } /* Start a sack timer or QUEUE a SACK for sending */ - if ((stcb->asoc.cumulative_tsn == stcb->asoc.highest_tsn_inside_map) && - (stcb->asoc.mapping_array[0] != 0xff)) { - if ((stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) || - (stcb->asoc.delayed_ack == 0) || - (stcb->asoc.numduptsns) || - (stcb->asoc.send_sack == 1)) { - if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { - (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); - } - sctp_send_sack(stcb); - } else { - if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { - sctp_timer_start(SCTP_TIMER_TYPE_RECV, - stcb->sctp_ep, stcb, NULL); - } - } - } else { - sctp_sack_check(stcb, was_a_gap, &abort_flag); - } + sctp_sack_check(stcb, was_a_gap, &abort_flag); if (abort_flag) return (2); diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index c7a806c0d9e..47590c76963 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -8973,7 +8973,7 @@ sctp_chunk_retransmission(struct sctp_inpcb *inp, /* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */ *cnt_out += 1; chk->sent = SCTP_DATAGRAM_SENT; - sctp_ucount_decr(asoc->sent_queue_retran_cnt); + /* sctp_ucount_decr(asoc->sent_queue_retran_cnt); */ if (fwd_tsn == 0) { return (0); } else { diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 0f2805fb613..04f76732cf1 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -674,7 +674,7 @@ sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_audit_indx = 0; } rep = 1; - SCTP_PRINTF("tot_flt_book:%d\n", tot_book); + SCTP_PRINTF("tot_flt_book:%d\n", tot_book_cnt); stcb->asoc.total_flight_count = tot_book_cnt; } @@ -703,8 +703,8 @@ sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb, } } if (lnet->flight_size != tot_out) { - SCTP_PRINTF("net:%x flight was %d corrected to %d\n", - (uint32_t) lnet, lnet->flight_size, + SCTP_PRINTF("net:%p flight was %d corrected to %d\n", + lnet, lnet->flight_size, tot_out); lnet->flight_size = tot_out; } @@ -1215,7 +1215,7 @@ sctp_print_mapping_array(struct sctp_association *asoc) } printf("Non renegable mapping array (last %d entries are zero):\n", asoc->mapping_array_size - limit); for (i = 0; i < limit; i++) { - printf("%2.2x%c", asoc->mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); + printf("%2.2x%c", asoc->nr_mapping_array[i], ((i + 1) % 16) ? ' ' : '\n'); } if (limit % 16) printf("\n"); diff --git a/sys/netinet/tcp_hostcache.c b/sys/netinet/tcp_hostcache.c index a0c4012f21f..d20adb7567d 100644 --- a/sys/netinet/tcp_hostcache.c +++ b/sys/netinet/tcp_hostcache.c @@ -107,9 +107,9 @@ __FBSDID("$FreeBSD$"); #define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */ static VNET_DEFINE(struct tcp_hostcache, tcp_hostcache); -static VNET_DEFINE(struct callout, tcp_hc_callout); - #define V_tcp_hostcache VNET(tcp_hostcache) + +static VNET_DEFINE(struct callout, tcp_hc_callout); #define V_tcp_hc_callout VNET(tcp_hc_callout) static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 0254cff0b47..cbba9cd4ed0 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -99,20 +99,6 @@ __FBSDID("$FreeBSD$"); static const int tcprexmtthresh = 3; VNET_DEFINE(struct tcpstat, tcpstat); -VNET_DEFINE(int, blackhole); -VNET_DEFINE(int, tcp_delack_enabled); -VNET_DEFINE(int, drop_synfin); -VNET_DEFINE(int, tcp_do_rfc3042); -VNET_DEFINE(int, tcp_do_rfc3390); -VNET_DEFINE(int, tcp_do_ecn); -VNET_DEFINE(int, tcp_ecn_maxretries); -VNET_DEFINE(int, tcp_insecure_rst); -VNET_DEFINE(int, tcp_do_autorcvbuf); -VNET_DEFINE(int, tcp_autorcvbuf_inc); -VNET_DEFINE(int, tcp_autorcvbuf_max); -VNET_DEFINE(int, tcp_do_rfc3465); -VNET_DEFINE(int, tcp_abc_l_var); - SYSCTL_VNET_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(tcpstat), tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); @@ -122,56 +108,79 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports"); +VNET_DEFINE(int, blackhole) = 0; +#define V_blackhole VNET(blackhole) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW, &VNET_NAME(blackhole), 0, "Do not send RST on segments to closed ports"); +VNET_DEFINE(int, tcp_delack_enabled) = 1; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, &VNET_NAME(tcp_delack_enabled), 0, "Delay ACK to try and piggyback it onto a data packet"); +VNET_DEFINE(int, drop_synfin) = 0; +#define V_drop_synfin VNET(drop_synfin) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW, &VNET_NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); +VNET_DEFINE(int, tcp_do_rfc3042) = 1; +#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3042), 0, "Enable RFC 3042 (Limited Transmit)"); +VNET_DEFINE(int, tcp_do_rfc3390) = 1; +#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +VNET_DEFINE(int, tcp_do_rfc3465) = 1; +#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc3465), 0, "Enable RFC 3465 (Appropriate Byte Counting)"); +VNET_DEFINE(int, tcp_abc_l_var) = 2; +#define V_tcp_abc_l_var VNET(tcp_abc_l_var) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW, &VNET_NAME(tcp_abc_l_var), 2, "Cap the max cwnd increment during slow-start to this number of segments"); SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN"); +VNET_DEFINE(int, tcp_do_ecn) = 0; SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0, "TCP ECN support"); +VNET_DEFINE(int, tcp_ecn_maxretries) = 1; SYSCTL_VNET_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0, "Max retries before giving up on ECN"); +VNET_DEFINE(int, tcp_insecure_rst) = 0; +#define V_tcp_insecure_rst VNET(tcp_insecure_rst) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW, &VNET_NAME(tcp_insecure_rst), 0, "Follow the old (insecure) criteria for accepting RST packets"); +VNET_DEFINE(int, tcp_do_autorcvbuf) = 1; +#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); +VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024; +#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_inc), 0, "Incrementor step size of automatic receive buffer"); +VNET_DEFINE(int, tcp_autorcvbuf_max) = 256*1024; +#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); @@ -181,8 +190,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW, &tcp_read_locking, 0, "Enable read locking strategy"); VNET_DEFINE(struct inpcbhead, tcb); -VNET_DEFINE(struct inpcbinfo, tcbinfo); #define tcb6 tcb /* for KAME src sync over BSD*'s */ +VNET_DEFINE(struct inpcbinfo, tcbinfo); static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_do_segment(struct mbuf *, struct tcphdr *, diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index ebe5e36ee3f..50973c4d455 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -87,43 +87,46 @@ __FBSDID("$FreeBSD$"); extern struct mbuf *m_copypack(); #endif -VNET_DEFINE(int, path_mtu_discovery); -VNET_DEFINE(int, ss_fltsz); -VNET_DEFINE(int, ss_fltsz_local); -VNET_DEFINE(int, tcp_do_newreno); -VNET_DEFINE(int, tcp_do_tso); -VNET_DEFINE(int, tcp_do_autosndbuf); -VNET_DEFINE(int, tcp_autosndbuf_inc); -VNET_DEFINE(int, tcp_autosndbuf_max); - +VNET_DEFINE(int, path_mtu_discovery) = 1; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW, &VNET_NAME(path_mtu_discovery), 1, "Enable Path MTU Discovery"); +VNET_DEFINE(int, ss_fltsz) = 1; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW, &VNET_NAME(ss_fltsz), 1, "Slow start flight size"); +VNET_DEFINE(int, ss_fltsz_local) = 4; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW, &VNET_NAME(ss_fltsz_local), 1, "Slow start flight size for local networks"); +VNET_DEFINE(int, tcp_do_newreno) = 1; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &VNET_NAME(tcp_do_newreno), 0, "Enable NewReno Algorithms"); +VNET_DEFINE(int, tcp_do_tso) = 1; +#define V_tcp_do_tso VNET(tcp_do_tso) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW, &VNET_NAME(tcp_do_tso), 0, "Enable TCP Segmentation Offload"); +VNET_DEFINE(int, tcp_do_autosndbuf) = 1; +#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW, &VNET_NAME(tcp_do_autosndbuf), 0, "Enable automatic send buffer sizing"); +VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024; +#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_inc), 0, "Incrementor step size of automatic send buffer"); +VNET_DEFINE(int, tcp_autosndbuf_max) = 256*1024; +#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); @@ -1048,7 +1051,7 @@ send: * XXX: Fixme: This is currently not the case for IPv6. */ if (tso) { - m->m_pkthdr.csum_flags = CSUM_TSO; + m->m_pkthdr.csum_flags |= CSUM_TSO; m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; } diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index cd7eb1ce39f..bfb53d3a742 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -74,30 +74,28 @@ __FBSDID("$FreeBSD$"); #include #endif /* TCPDEBUG */ -static VNET_DEFINE(int, tcp_reass_maxseg); -VNET_DEFINE(int, tcp_reass_qsize); -static VNET_DEFINE(int, tcp_reass_maxqlen); -static VNET_DEFINE(int, tcp_reass_overflows); - -#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) -#define V_tcp_reass_maxqlen VNET(tcp_reass_maxqlen) -#define V_tcp_reass_overflows VNET(tcp_reass_overflows) - SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, "TCP Segment Reassembly Queue"); +static VNET_DEFINE(int, tcp_reass_maxseg) = 0; +#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, &VNET_NAME(tcp_reass_maxseg), 0, "Global maximum number of TCP Segments in Reassembly Queue"); +VNET_DEFINE(int, tcp_reass_qsize) = 0; SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD, &VNET_NAME(tcp_reass_qsize), 0, "Global number of TCP Segments currently in Reassembly Queue"); +static VNET_DEFINE(int, tcp_reass_maxqlen) = 48; +#define V_tcp_reass_maxqlen VNET(tcp_reass_maxqlen) SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW, &VNET_NAME(tcp_reass_maxqlen), 0, "Maximum number of TCP Segments per individual Reassembly Queue"); +static VNET_DEFINE(int, tcp_reass_overflows) = 0; +#define V_tcp_reass_overflows VNET(tcp_reass_overflows) SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, &VNET_NAME(tcp_reass_overflows), 0, "Global number of TCP Segment Reassembly Queue Overflows"); @@ -117,11 +115,6 @@ void tcp_reass_init(void) { - V_tcp_reass_maxseg = 0; - V_tcp_reass_qsize = 0; - V_tcp_reass_maxqlen = 48; - V_tcp_reass_overflows = 0; - V_tcp_reass_maxseg = nmbclusters / 16; TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", &V_tcp_reass_maxseg); diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index 06fb50269fa..737c2b2af09 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -123,29 +123,28 @@ __FBSDID("$FreeBSD$"); #include VNET_DECLARE(struct uma_zone *, sack_hole_zone); -VNET_DEFINE(int, tcp_do_sack); -VNET_DEFINE(int, tcp_sack_maxholes); -VNET_DEFINE(int, tcp_sack_globalmaxholes); -VNET_DEFINE(int, tcp_sack_globalholes); - #define V_sack_hole_zone VNET(sack_hole_zone) -#define V_tcp_do_sack VNET(tcp_do_sack) -#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) -#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) -#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); +VNET_DEFINE(int, tcp_do_sack) = 1; +#define V_tcp_do_sack VNET(tcp_do_sack) SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, &VNET_NAME(tcp_do_sack), 0, "Enable/Disable TCP SACK support"); +VNET_DEFINE(int, tcp_sack_maxholes) = 128; +#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW, &VNET_NAME(tcp_sack_maxholes), 0, "Maximum number of TCP SACK holes allowed per connection"); +VNET_DEFINE(int, tcp_sack_globalmaxholes) = 65536; +#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW, &VNET_NAME(tcp_sack_globalmaxholes), 0, "Global maximum number of TCP SACK holes"); +VNET_DEFINE(int, tcp_sack_globalholes) = 0; +#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) SYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD, &VNET_NAME(tcp_sack_globalholes), 0, "Global number of TCP SACK holes currently allocated"); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 9ec434c0c34..43ed1e4e750 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -111,28 +111,10 @@ __FBSDID("$FreeBSD$"); #include -VNET_DEFINE(int, tcp_mssdflt); +VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; #ifdef INET6 -VNET_DEFINE(int, tcp_v6mssdflt); +VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif -VNET_DEFINE(int, tcp_minmss); -VNET_DEFINE(int, tcp_do_rfc1323); - -static VNET_DEFINE(int, icmp_may_rst); -static VNET_DEFINE(int, tcp_isn_reseed_interval); -static VNET_DEFINE(int, tcp_inflight_enable); -static VNET_DEFINE(int, tcp_inflight_rttthresh); -static VNET_DEFINE(int, tcp_inflight_min); -static VNET_DEFINE(int, tcp_inflight_max); -static VNET_DEFINE(int, tcp_inflight_stab); - -#define V_icmp_may_rst VNET(icmp_may_rst) -#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) -#define V_tcp_inflight_enable VNET(tcp_inflight_enable) -#define V_tcp_inflight_rttthresh VNET(tcp_inflight_rttthresh) -#define V_tcp_inflight_min VNET(tcp_inflight_min) -#define V_tcp_inflight_max VNET(tcp_inflight_max) -#define V_tcp_inflight_stab VNET(tcp_inflight_stab) static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) @@ -194,10 +176,12 @@ vnet_sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) * with packet generation and sending. Set to zero to disable MINMSS * checking. This setting prevents us from sending too small packets. */ +VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS; SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minmum TCP Maximum Segment Size"); +VNET_DEFINE(int, tcp_do_rfc1323) = 1; SYSCTL_VNET_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, &VNET_NAME(tcp_do_rfc1323), 0, "Enable rfc1323 (high performance TCP) extensions"); @@ -217,10 +201,14 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); +static VNET_DEFINE(int, icmp_may_rst) = 1; +#define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); +static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; +#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, "Seconds between reseeding of ISN secret"); @@ -233,6 +221,8 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0, "TCP inflight data limiting"); +static VNET_DEFINE(int, tcp_inflight_enable) = 1; +#define V_tcp_inflight_enable VNET(tcp_inflight_enable) SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW, &VNET_NAME(tcp_inflight_enable), 0, "Enable automatic TCP inflight data limiting"); @@ -242,19 +232,27 @@ SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, debug, CTLFLAG_RW, &tcp_inflight_debug, 0, "Debug TCP inflight calculations"); +static VNET_DEFINE(int, tcp_inflight_rttthresh); +#define V_tcp_inflight_rttthresh VNET(tcp_inflight_rttthresh) SYSCTL_VNET_PROC(_net_inet_tcp_inflight, OID_AUTO, rttthresh, CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(tcp_inflight_rttthresh), 0, vnet_sysctl_msec_to_ticks, "I", "RTT threshold below which inflight will deactivate itself"); +static VNET_DEFINE(int, tcp_inflight_min) = 6144; +#define V_tcp_inflight_min VNET(tcp_inflight_min) SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, min, CTLFLAG_RW, &VNET_NAME(tcp_inflight_min), 0, "Lower-bound for TCP inflight window"); +static VNET_DEFINE(int, tcp_inflight_max) = TCP_MAXWIN << TCP_MAX_WINSHIFT; +#define V_tcp_inflight_max VNET(tcp_inflight_max) SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, max, CTLFLAG_RW, &VNET_NAME(tcp_inflight_max), 0, "Upper-bound for TCP inflight window"); +static VNET_DEFINE(int, tcp_inflight_stab) = 20; +#define V_tcp_inflight_stab VNET(tcp_inflight_stab) SYSCTL_VNET_INT(_net_inet_tcp_inflight, OID_AUTO, stab, CTLFLAG_RW, &VNET_NAME(tcp_inflight_stab), 0, "Inflight Algorithm Stabilization 20 = 2 packets"); @@ -329,53 +327,6 @@ tcp_init(void) { int hashsize; - V_blackhole = 0; - V_tcp_delack_enabled = 1; - V_drop_synfin = 0; - V_tcp_do_rfc3042 = 1; - V_tcp_do_rfc3390 = 1; - V_tcp_do_ecn = 0; - V_tcp_ecn_maxretries = 1; - V_tcp_insecure_rst = 0; - V_tcp_do_autorcvbuf = 1; - V_tcp_autorcvbuf_inc = 16*1024; - V_tcp_autorcvbuf_max = 256*1024; - V_tcp_do_rfc3465 = 1; - V_tcp_abc_l_var = 2; - - V_tcp_mssdflt = TCP_MSS; -#ifdef INET6 - V_tcp_v6mssdflt = TCP6_MSS; -#endif - V_tcp_minmss = TCP_MINMSS; - V_tcp_do_rfc1323 = 1; - V_icmp_may_rst = 1; - V_tcp_isn_reseed_interval = 0; - V_tcp_inflight_enable = 1; - V_tcp_inflight_min = 6144; - V_tcp_inflight_max = TCP_MAXWIN << TCP_MAX_WINSHIFT; - V_tcp_inflight_stab = 20; - - V_path_mtu_discovery = 1; - V_ss_fltsz = 1; - V_ss_fltsz_local = 4; - V_tcp_do_newreno = 1; - V_tcp_do_tso = 1; - V_tcp_do_autosndbuf = 1; - V_tcp_autosndbuf_inc = 8*1024; - V_tcp_autosndbuf_max = 256*1024; - - V_nolocaltimewait = 0; - - V_tcp_do_sack = 1; - V_tcp_sack_maxholes = 128; - V_tcp_sack_globalmaxholes = 65536; - V_tcp_sack_globalholes = 0; - - V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH; - - TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); - hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); if (!powerof2(hashsize)) { @@ -385,16 +336,21 @@ tcp_init(void) in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE); + V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH; + /* * These have to be type stable for the benefit of the timers. */ V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(V_tcpcb_zone, maxsockets); + tcp_tw_init(); syncache_init(); tcp_hc_init(); tcp_reass_init(); + + TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 4d9d487b503..c47493c2df3 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -97,19 +97,14 @@ __FBSDID("$FreeBSD$"); #include -static VNET_DEFINE(struct tcp_syncache, tcp_syncache); -static VNET_DEFINE(int, tcp_syncookies); -static VNET_DEFINE(int, tcp_syncookiesonly); -VNET_DEFINE(int, tcp_sc_rst_sock_fail); - -#define V_tcp_syncache VNET(tcp_syncache) +static VNET_DEFINE(int, tcp_syncookies) = 1; #define V_tcp_syncookies VNET(tcp_syncookies) -#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) - SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW, &VNET_NAME(tcp_syncookies), 0, "Use TCP SYN cookies if the syncache overflows"); +static VNET_DEFINE(int, tcp_syncookiesonly) = 0; +#define V_tcp_syncookiesonly VNET(tcp_syncookiesonly) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); @@ -148,6 +143,9 @@ static struct syncache #define TCP_SYNCACHE_HASHSIZE 512 #define TCP_SYNCACHE_BUCKETLIMIT 30 +static VNET_DEFINE(struct tcp_syncache, tcp_syncache); +#define V_tcp_syncache VNET(tcp_syncache) + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, @@ -170,6 +168,7 @@ SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, &VNET_NAME(tcp_syncache.rexmt_limit), 0, "Limit on SYN/ACK retransmissions"); +VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1; SYSCTL_VNET_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0, "Send reset on socket allocation failure"); @@ -224,10 +223,6 @@ syncache_init(void) { int i; - V_tcp_syncookies = 1; - V_tcp_syncookiesonly = 0; - V_tcp_sc_rst_sock_fail = 1; - V_tcp_syncache.cache_count = 0; V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 6e5b013ae98..42df4fe9463 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -92,6 +92,8 @@ __FBSDID("$FreeBSD$"); #include +static VNET_DEFINE(uma_zone_t, tcptw_zone); +#define V_tcptw_zone VNET(tcptw_zone) static int maxtcptw; /* @@ -100,11 +102,7 @@ static int maxtcptw; * queue pointers in each tcptw structure, are protected using the global * tcbinfo lock, which must be held over queue iteration and modification. */ -static VNET_DEFINE(uma_zone_t, tcptw_zone); static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl); -VNET_DEFINE(int, nolocaltimewait); - -#define V_tcptw_zone VNET(tcptw_zone) #define V_twq_2msl VNET(twq_2msl) static void tcp_tw_2msl_reset(struct tcptw *, int); @@ -149,6 +147,8 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW, &maxtcptw, 0, sysctl_maxtcptw, "IU", "Maximum number of compressed TCP TIME_WAIT entries"); +VNET_DEFINE(int, nolocaltimewait) = 0; +#define V_nolocaltimewait VNET(nolocaltimewait) SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW, &VNET_NAME(nolocaltimewait), 0, "Do not create compressed TCP TIME_WAIT entries for local connections"); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 3a59eee6c2e..5811439dcd6 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -42,12 +42,12 @@ * Kernel variables for tcp. */ VNET_DECLARE(int, tcp_do_rfc1323); +#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) + VNET_DECLARE(int, tcp_reass_qsize); VNET_DECLARE(struct uma_zone *, tcp_reass_zone); -#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) #define V_tcp_reass_qsize VNET(tcp_reass_qsize) #define V_tcp_reass_zone VNET(tcp_reass_zone) - #endif /* _KERNEL */ /* TCP segment queue entry */ @@ -558,11 +558,10 @@ SYSCTL_DECL(_net_inet_tcp_sack); MALLOC_DECLARE(M_TCPLOG); #endif -extern int tcp_log_in_vain; - VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */ VNET_DECLARE(struct inpcbinfo, tcbinfo); VNET_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */ +extern int tcp_log_in_vain; VNET_DECLARE(int, tcp_mssdflt); /* XXX */ VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_delack_enabled); @@ -570,7 +569,6 @@ VNET_DECLARE(int, tcp_do_newreno); VNET_DECLARE(int, path_mtu_discovery); VNET_DECLARE(int, ss_fltsz); VNET_DECLARE(int, ss_fltsz_local); - #define V_tcb VNET(tcb) #define V_tcbinfo VNET(tcbinfo) #define V_tcpstat VNET(tcpstat) @@ -582,55 +580,13 @@ VNET_DECLARE(int, ss_fltsz_local); #define V_ss_fltsz VNET(ss_fltsz) #define V_ss_fltsz_local VNET(ss_fltsz_local) -VNET_DECLARE(int, blackhole); -VNET_DECLARE(int, drop_synfin); -VNET_DECLARE(int, tcp_do_rfc3042); -VNET_DECLARE(int, tcp_do_rfc3390); -VNET_DECLARE(int, tcp_insecure_rst); -VNET_DECLARE(int, tcp_do_autorcvbuf); -VNET_DECLARE(int, tcp_autorcvbuf_inc); -VNET_DECLARE(int, tcp_autorcvbuf_max); -VNET_DECLARE(int, tcp_do_rfc3465); -VNET_DECLARE(int, tcp_abc_l_var); - -#define V_blackhole VNET(blackhole) -#define V_drop_synfin VNET(drop_synfin) -#define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) -#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) -#define V_tcp_insecure_rst VNET(tcp_insecure_rst) -#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) -#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) -#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) -#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) -#define V_tcp_abc_l_var VNET(tcp_abc_l_var) - -VNET_DECLARE(int, tcp_do_tso); -VNET_DECLARE(int, tcp_do_autosndbuf); -VNET_DECLARE(int, tcp_autosndbuf_inc); -VNET_DECLARE(int, tcp_autosndbuf_max); - -#define V_tcp_do_tso VNET(tcp_do_tso) -#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) -#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) -#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) - -VNET_DECLARE(int, nolocaltimewait); - -#define V_nolocaltimewait VNET(nolocaltimewait) - VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */ -VNET_DECLARE(int, tcp_sack_maxholes); -VNET_DECLARE(int, tcp_sack_globalmaxholes); -VNET_DECLARE(int, tcp_sack_globalholes); VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ +#define V_tcp_do_sack VNET(tcp_do_sack) +#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) + VNET_DECLARE(int, tcp_do_ecn); /* TCP ECN enabled/disabled */ VNET_DECLARE(int, tcp_ecn_maxretries); - -#define V_tcp_do_sack VNET(tcp_do_sack) -#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) -#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) -#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) -#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) #define V_tcp_do_ecn VNET(tcp_do_ecn) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 0d8e04deaf6..45410387c17 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -94,8 +94,6 @@ __FBSDID("$FreeBSD$"); * Per RFC 768, August, 1980. */ -VNET_DEFINE(int, udp_blackhole); - /* * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums * removes the only data integrity mechanism for packets and malformed @@ -110,6 +108,7 @@ int udp_log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, &udp_log_in_vain, 0, "Log all incoming UDP packets"); +VNET_DEFINE(int, udp_blackhole) = 0; SYSCTL_VNET_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &VNET_NAME(udp_blackhole), 0, "Do not send port unreachables for refused connects"); @@ -133,14 +132,13 @@ SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, VNET_DEFINE(struct inpcbhead, udb); /* from udp_var.h */ VNET_DEFINE(struct inpcbinfo, udbinfo); static VNET_DEFINE(uma_zone_t, udpcb_zone); -VNET_DEFINE(struct udpstat, udpstat); /* from udp_var.h */ - #define V_udpcb_zone VNET(udpcb_zone) #ifndef UDBHASHSIZE #define UDBHASHSIZE 128 #endif +VNET_DEFINE(struct udpstat, udpstat); /* from udp_var.h */ SYSCTL_VNET_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &VNET_NAME(udpstat), udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); @@ -179,7 +177,6 @@ void udp_init(void) { - V_udp_blackhole = 0; in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index b8d994cc3a6..aa7ee470c8b 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -129,19 +129,17 @@ void kmod_udpstat_inc(int statnum); SYSCTL_DECL(_net_inet_udp); extern struct pr_usrreqs udp_usrreqs; - VNET_DECLARE(struct inpcbhead, udb); VNET_DECLARE(struct inpcbinfo, udbinfo); -VNET_DECLARE(struct udpstat, udpstat); -VNET_DECLARE(int, udp_blackhole); - #define V_udb VNET(udb) #define V_udbinfo VNET(udbinfo) -#define V_udpstat VNET(udpstat) -#define V_udp_blackhole VNET(udp_blackhole) extern u_long udp_sendspace; extern u_long udp_recvspace; +VNET_DECLARE(struct udpstat, udpstat); +VNET_DECLARE(int, udp_blackhole); +#define V_udpstat VNET(udpstat) +#define V_udp_blackhole VNET(udp_blackhole) extern int udp_log_in_vain; int udp_newudpcb(struct inpcb *); diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index 8900f7d08b6..1523133d3db 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -106,16 +106,17 @@ void frag6_init(void) { - V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q; V_ip6_maxfragpackets = nmbclusters / 4; V_ip6_maxfrags = nmbclusters / 4; + V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q; if (!IS_DEFAULT_VNET(curvnet)) return; - IP6Q_LOCK_INIT(); EVENTHANDLER_REGISTER(nmbclusters_change, frag6_change, NULL, EVENTHANDLER_PRI_ANY); + + IP6Q_LOCK_INIT(); } /* diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 57f8d32c59a..87df9c369c8 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -113,23 +113,22 @@ __FBSDID("$FreeBSD$"); extern struct domain inet6domain; +VNET_DEFINE(struct icmp6stat, icmp6stat); + VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); +static VNET_DEFINE(int, icmp6errpps_count) = 0; +static VNET_DEFINE(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) #define V_ripcb VNET(ripcb) #define V_icmp6errppslim VNET(icmp6errppslim) +#define V_icmp6errpps_count VNET(icmp6errpps_count) +#define V_icmp6errppslim_last VNET(icmp6errppslim_last) #define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) -VNET_DEFINE(struct icmp6stat, icmp6stat); -static VNET_DEFINE(int, icmp6errpps_count); -static VNET_DEFINE(struct timeval, icmp6errppslim_last); - -#define V_icmp6errpps_count VNET(icmp6errpps_count) -#define V_icmp6errppslim_last VNET(icmp6errppslim_last) - static void icmp6_errcount(struct icmp6errstat *, int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); @@ -144,14 +143,6 @@ static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo * struct ifnet *, int)); static int icmp6_notify_error(struct mbuf **, int, int, int); - -void -icmp6_init(void) -{ - - V_icmp6errpps_count = 0; -} - /* * Kernel module interface for updating icmp6stat. The argument is an index * into icmp6stat treated as an array of u_quad_t. While this encodes the diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 74c15d58a4a..15ff6b29bd1 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -63,6 +63,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" @@ -176,6 +177,14 @@ in6_mask2len(struct in6_addr *mask, u_char *lim0) #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) +#ifdef COMPAT_FREEBSD32 +struct in6_ndifreq32 { + char ifname[IFNAMSIZ]; + uint32_t ifindex; +}; +#define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32) +#endif + int in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) @@ -226,6 +235,22 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); + +#ifdef COMPAT_FREEBSD32 + case SIOCGDEFIFACE32_IN6: + { + struct in6_ndifreq ndif; + struct in6_ndifreq32 *ndif32; + + error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif, + ifp); + if (error) + return (error); + ndif32 = (struct in6_ndifreq32 *)data; + ndif32->ifindex = ndif.ifindex; + return (0); + } +#endif } switch (cmd) { diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c index a481706f53d..e786836459f 100644 --- a/sys/netinet6/in6_gif.c +++ b/sys/netinet6/in6_gif.c @@ -41,8 +41,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include +#include #include #include @@ -69,6 +71,13 @@ __FBSDID("$FreeBSD$"); #include +VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; +#define V_ip6_gif_hlim VNET(ip6_gif_hlim) + +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, + &VNET_NAME(ip6_gif_hlim), 0, ""); + static int gif_validate6(const struct ip6_hdr *, struct gif_softc *, struct ifnet *); diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index 5041ee2f233..363d7bedeb6 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -67,10 +67,15 @@ __FBSDID("$FreeBSD$"); #include #include -VNET_DEFINE(unsigned long, in6_maxmtu); -VNET_DEFINE(int, ip6_auto_linklocal); -VNET_DEFINE(struct callout, in6_tmpaddrtimer_ch); +VNET_DEFINE(unsigned long, in6_maxmtu) = 0; +#ifdef IP6_AUTO_LINKLOCAL +VNET_DEFINE(int, ip6_auto_linklocal) = IP6_AUTO_LINKLOCAL; +#else +VNET_DEFINE(int, ip6_auto_linklocal) = 1; /* enabled by default */ +#endif + +VNET_DEFINE(struct callout, in6_tmpaddrtimer_ch); #define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) VNET_DECLARE(struct inpcbinfo, ripcbinfo); diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 3289e57a274..51f5187598e 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -234,7 +234,6 @@ struct ip6protosw inet6sw[] = { .pr_output = rip6_output, .pr_ctlinput = rip6_ctlinput, .pr_ctloutput = rip6_ctloutput, - .pr_init = icmp6_init, .pr_fasttimo = icmp6_fasttimo, .pr_slowtimo = icmp6_slowtimo, .pr_usrreqs = &rip6_usrreqs @@ -378,25 +377,44 @@ VNET_DOMAIN_SET(inet6); /* * Internet configuration info */ -VNET_DEFINE(int, ip6_forwarding); -VNET_DEFINE(int, ip6_sendredirects); -VNET_DEFINE(int, ip6_defhlim); -VNET_DEFINE(int, ip6_defmcasthlim); -VNET_DEFINE(int, ip6_accept_rtadv); -VNET_DEFINE(int, ip6_maxfragpackets); -VNET_DEFINE(int, ip6_maxfrags); -VNET_DEFINE(int, ip6_log_interval); -VNET_DEFINE(int, ip6_hdrnestlimit); -VNET_DEFINE(int, ip6_dad_count); -VNET_DEFINE(int, ip6_auto_flowlabel); -VNET_DEFINE(int, ip6_use_deprecated); -VNET_DEFINE(int, ip6_rr_prune); -VNET_DEFINE(int, ip6_mcast_pmtu); -VNET_DEFINE(int, ip6_v6only); -VNET_DEFINE(int, ip6_keepfaith); -VNET_DEFINE(time_t, ip6_log_time); -VNET_DEFINE(int, ip6stealth); -VNET_DEFINE(int, nd6_onlink_ns_rfc4861); +#ifndef IPV6FORWARDING +#ifdef GATEWAY6 +#define IPV6FORWARDING 1 /* forward IP6 packets not for us */ +#else +#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */ +#endif /* GATEWAY6 */ +#endif /* !IPV6FORWARDING */ + +#ifndef IPV6_SENDREDIRECTS +#define IPV6_SENDREDIRECTS 1 +#endif + +VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING; /* act as router? */ +VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS; +VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM; +VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS; +VNET_DEFINE(int, ip6_accept_rtadv) = 0; +VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_log_interval) = 5; +VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we + * process? */ +VNET_DEFINE(int, ip6_dad_count) = 1; /* DupAddrDetectionTransmits */ +VNET_DEFINE(int, ip6_auto_flowlabel) = 1; +VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr + * (RFC2462 5.5.4) */ +VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix + * walk list every 5 sec. */ +VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */ +VNET_DEFINE(int, ip6_v6only) = 1; + +VNET_DEFINE(int, ip6_keepfaith) = 0; +VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L; +#ifdef IPSTEALTH +VNET_DEFINE(int, ip6stealth) = 0; +#endif +VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS + * (RFC 4861) */ /* icmp6 */ /* @@ -404,26 +422,31 @@ VNET_DEFINE(int, nd6_onlink_ns_rfc4861); * XXX: what if we don't define INET? Should we define pmtu6_expire * or so? (jinmei@kame.net 19990310) */ -VNET_DEFINE(int, pmtu_expire); -VNET_DEFINE(int, pmtu_probe); +VNET_DEFINE(int, pmtu_expire) = 60*10; +VNET_DEFINE(int, pmtu_probe) = 60*2; /* raw IP6 parameters */ /* * Nominal space allocated to a raw ip socket. */ -VNET_DEFINE(u_long, rip6_sendspace); -VNET_DEFINE(u_long, rip6_recvspace); +#define RIPV6SNDQ 8192 +#define RIPV6RCVQ 8192 + +VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ; +VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ; /* ICMPV6 parameters */ -VNET_DEFINE(int, icmp6_rediraccept); -VNET_DEFINE(int, icmp6_redirtimeout); -VNET_DEFINE(int, icmp6errppslim); +VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */ +VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */ +VNET_DEFINE(int, icmp6errppslim) = 100; /* 100pps */ /* control how to respond to NI queries */ -VNET_DEFINE(int, icmp6_nodeinfo); +VNET_DEFINE(int, icmp6_nodeinfo) = + (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK); /* UDP on IP6 parameters */ -VNET_DEFINE(int, udp6_sendspace); -VNET_DEFINE(int, udp6_recvspace); +VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */ +VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6)); + /* 40 1K datagrams */ /* * sysctl related items. @@ -571,7 +594,6 @@ SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint, CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, ""); SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW, &VNET_NAME(nd6_debug), 0, ""); - SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, nd6_onlink_ns_rfc4861, CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861), 0, "Accept 'on-link' nd6 NS in compliance with RFC 4861."); diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index 1ae04c3f112..8927a176df9 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -204,20 +204,21 @@ in6_matroute(void *v_arg, struct radix_node_head *head) SYSCTL_DECL(_net_inet6_ip6); -static VNET_DEFINE(int, rtq_reallyold6); -static VNET_DEFINE(int, rtq_minreallyold6); -static VNET_DEFINE(int, rtq_toomany6); - +static VNET_DEFINE(int, rtq_reallyold6) = 60*60; + /* one hour is ``really old'' */ #define V_rtq_reallyold6 VNET(rtq_reallyold6) -#define V_rtq_minreallyold6 VNET(rtq_minreallyold6) -#define V_rtq_toomany6 VNET(rtq_toomany6) - SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, &VNET_NAME(rtq_reallyold6) , 0, ""); +static VNET_DEFINE(int, rtq_minreallyold6) = 10; + /* never automatically crank down to less */ +#define V_rtq_minreallyold6 VNET(rtq_minreallyold6) SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, &VNET_NAME(rtq_minreallyold6) , 0, ""); +static VNET_DEFINE(int, rtq_toomany6) = 128; + /* 128 cached routes is ``too many'' */ +#define V_rtq_toomany6 VNET(rtq_toomany6) SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, &VNET_NAME(rtq_toomany6) , 0, ""); @@ -277,7 +278,7 @@ in6_rtqkill(struct radix_node *rn, void *rock) } #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ -static VNET_DEFINE(int, rtq_timeout6); +static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT; static VNET_DEFINE(struct callout, rtq_timer6); #define V_rtq_timeout6 VNET(rtq_timeout6) @@ -346,7 +347,6 @@ struct mtuex_arg { struct radix_node_head *rnh; time_t nextstop; }; - static VNET_DEFINE(struct callout, rtq_mtutimer); #define V_rtq_mtutimer VNET(rtq_mtutimer) @@ -422,11 +422,6 @@ in6_inithead(void **head, int off) if (off == 0) /* See above */ return 1; /* only do the rest for the real thing */ - V_rtq_reallyold6 = 60*60; /* one hour is ``really old'' */ - V_rtq_minreallyold6 = 10; /* never automatically crank down to less */ - V_rtq_toomany6 = 128; /* 128 cached routes is ``too many'' */ - V_rtq_timeout6 = RTQ_TIMEOUT; - rnh = *head; KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?")); rnh->rnh_addaddr = in6_addroute; diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index ea302a52fa4..49bc715452a 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -122,12 +122,11 @@ static struct sx addrsel_sxlock; #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) #define ADDR_LABEL_NOTAPP (-1) - static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); -VNET_DEFINE(int, ip6_prefer_tempaddr); - #define V_defaultaddrpolicy VNET(defaultaddrpolicy) +VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; + static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, int)); @@ -182,7 +181,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { - struct in6_addr dst; + struct in6_addr dst, tmp; struct ifnet *ifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; @@ -326,10 +325,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) continue; + /* If jailed only take addresses of the jail into account. */ if (cred != NULL && - prison_local_ip6(cred, &ia->ia_addr.sin6_addr, - (inp != NULL && - (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) + prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) continue; /* Rule 1: Prefer same address */ @@ -476,10 +474,26 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, return (EADDRNOTAVAIL); } + /* + * At this point at least one of the addresses belonged to the jail + * but it could still be, that we want to further restrict it, e.g. + * theoratically IN6_IS_ADDR_LOOPBACK. + * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. + * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should + * let all others previously selected pass. + * Use tmp to not change ::1 on lo0 to the primary jail address. + */ + tmp = ia->ia_addr.sin6_addr; + if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && + (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { + IN6_IFADDR_RUNLOCK(); + return (EADDRNOTAVAIL); + } + if (ifpp) *ifpp = ifp; - bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp)); + bcopy(&tmp, srcp, sizeof(*srcp)); IN6_IFADDR_RUNLOCK(); return (0); } @@ -937,8 +951,6 @@ void addrsel_policy_init(void) { - V_ip6_prefer_tempaddr = 0; - init_policy_queue(); /* initialize the "last resort" policy */ diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index d0a54e06a36..00342fde762 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -487,12 +487,7 @@ struct in6_rrenumreq { #ifdef _KERNEL VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead); -VNET_DECLARE(struct icmp6stat, icmp6stat); -VNET_DECLARE(unsigned long, in6_maxmtu); - #define V_in6_ifaddrhead VNET(in6_ifaddrhead) -#define V_icmp6stat VNET(icmp6stat) -#define V_in6_maxmtu VNET(in6_maxmtu) extern struct rwlock in6_ifaddr_lock; #define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED) @@ -503,6 +498,8 @@ extern struct rwlock in6_ifaddr_lock; #define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED) #define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock) +VNET_DECLARE(struct icmp6stat, icmp6stat); +#define V_icmp6stat VNET(icmp6stat) #define in6_ifstat_inc(ifp, tag) \ do { \ if (ifp) \ @@ -511,6 +508,8 @@ do { \ extern struct in6_addr zeroin6_addr; extern u_char inet6ctlerrmap[]; +VNET_DECLARE(unsigned long, in6_maxmtu); +#define V_in6_maxmtu VNET(in6_maxmtu) #endif /* _KERNEL */ /* diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index c244f374b92..42f935143aa 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -116,6 +116,7 @@ __FBSDID("$FreeBSD$"); extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; +VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); static struct netisr_handler ip6_nh = { .nh_name = "ip6", @@ -124,36 +125,16 @@ static struct netisr_handler ip6_nh = { .nh_policy = NETISR_POLICY_FLOW, }; -VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); -VNET_DEFINE(struct ip6stat, ip6stat); - VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); -VNET_DECLARE(int, dad_init); -VNET_DECLARE(int, pmtu_expire); -VNET_DECLARE(int, pmtu_probe); -VNET_DECLARE(u_long, rip6_sendspace); -VNET_DECLARE(u_long, rip6_recvspace); -VNET_DECLARE(int, icmp6errppslim); -VNET_DECLARE(int, icmp6_nodeinfo); -VNET_DECLARE(int, udp6_sendspace); -VNET_DECLARE(int, udp6_recvspace); - #define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) -#define V_dad_init VNET(dad_init) -#define V_pmtu_expire VNET(pmtu_expire) -#define V_pmtu_probe VNET(pmtu_probe) -#define V_rip6_sendspace VNET(rip6_sendspace) -#define V_rip6_recvspace VNET(rip6_recvspace) -#define V_icmp6errppslim VNET(icmp6errppslim) -#define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) -#define V_udp6_sendspace VNET(udp6_sendspace) -#define V_udp6_recvspace VNET(udp6_recvspace) + +VNET_DEFINE(struct pfil_head, inet6_pfil_hook); + +VNET_DEFINE(struct ip6stat, ip6stat); struct rwlock in6_ifaddr_lock; RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); -VNET_DEFINE (struct pfil_head, inet6_pfil_hook); - static void ip6_init2(void *); static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -171,82 +152,11 @@ ip6_init(void) struct ip6protosw *pr; int i; - V_in6_maxmtu = 0; -#ifdef IP6_AUTO_LINKLOCAL - V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; -#else - V_ip6_auto_linklocal = 1; /* enabled by default */ -#endif TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", &V_ip6_auto_linklocal); -#ifndef IPV6FORWARDING -#ifdef GATEWAY6 -#define IPV6FORWARDING 1 /* forward IP6 packets not for us */ -#else -#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */ -#endif /* GATEWAY6 */ -#endif /* !IPV6FORWARDING */ - -#ifndef IPV6_SENDREDIRECTS -#define IPV6_SENDREDIRECTS 1 -#endif - - V_ip6_forwarding = IPV6FORWARDING; /* act as router? */ - V_ip6_sendredirects = IPV6_SENDREDIRECTS; - V_ip6_defhlim = IPV6_DEFHLIM; - V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS; - V_ip6_accept_rtadv = 0; - V_ip6_log_interval = 5; - V_ip6_hdrnestlimit = 15; /* How many header options will we process? */ - V_ip6_dad_count = 1; /* DupAddrDetectionTransmits */ - V_ip6_auto_flowlabel = 1; - V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */ - V_ip6_rr_prune = 5; /* router renumbering prefix - * walk list every 5 sec. */ - V_ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */ - V_ip6_v6only = 1; - V_ip6_keepfaith = 0; - V_ip6_log_time = (time_t)0L; -#ifdef IPSTEALTH - V_ip6stealth = 0; -#endif - V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */ - - V_pmtu_expire = 60*10; - V_pmtu_probe = 60*2; - - /* raw IP6 parameters */ - /* - * Nominal space allocated to a raw ip socket. - */ -#define RIPV6SNDQ 8192 -#define RIPV6RCVQ 8192 - V_rip6_sendspace = RIPV6SNDQ; - V_rip6_recvspace = RIPV6RCVQ; - - /* ICMPV6 parameters */ - V_icmp6_rediraccept = 1; /* accept and process redirects */ - V_icmp6_redirtimeout = 10 * 60; /* 10 minutes */ - V_icmp6errppslim = 100; /* 100pps */ - /* control how to respond to NI queries */ - V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK); - - /* UDP on IP6 parameters */ - V_udp6_sendspace = 9216; /* really max datagram size */ - V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6)); - /* 40 1K datagrams */ - V_dad_init = 0; - TAILQ_INIT(&V_in6_ifaddrhead); - scope6_init(); - addrsel_policy_init(); - nd6_init(); - frag6_init(); - - V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; - /* Initialize packet filter hooks. */ V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; V_inet6_pfil_hook.ph_af = AF_INET6; @@ -254,6 +164,13 @@ ip6_init(void) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + scope6_init(); + addrsel_policy_init(); + nd6_init(); + frag6_init(); + + V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; + /* Skip global initialization stuff for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index 54960620d28..0c1ff78bbda 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -130,7 +130,6 @@ static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry"); static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); -static void pim6_init(void); static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); static int set_pim6(int *); static int socket_send(struct socket *, struct mbuf *, @@ -148,12 +147,11 @@ static const struct ip6protosw in6_pim_protosw = { .pr_input = pim6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, - .pr_init = pim6_init, .pr_usrreqs = &rip6_usrreqs }; static int pim6_encapcheck(const struct mbuf *, int, int, void *); -static VNET_DEFINE(int, ip6_mrouter_ver); +static VNET_DEFINE(int, ip6_mrouter_ver) = 0; #define V_ip6_mrouter_ver VNET(ip6_mrouter_ver) SYSCTL_DECL(_net_inet6); @@ -212,7 +210,7 @@ static struct mtx mif6_mtx; #define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx) #ifdef MRT6DEBUG -static VNET_DEFINE(u_int, mrt6debug); /* debug level */ +static VNET_DEFINE(u_int, mrt6debug) = 0; /* debug level */ #define V_mrt6debug VNET(mrt6debug) #define DEBUG_MFC 0x02 #define DEBUG_FORWARD 0x04 @@ -338,15 +336,6 @@ int X_ip6_mrouter_set(struct socket *, struct sockopt *); int X_ip6_mrouter_get(struct socket *, struct sockopt *); int X_mrt6_ioctl(u_long, caddr_t); -static void -pim6_init(void) -{ - -#ifdef MRT6DEBUG - V_mrt6debug = 0; /* debug level */ -#endif -} - /* * Handle MRT setsockopt commands to modify the multicast routing tables. */ @@ -533,11 +522,7 @@ static int ip6_mrouter_init(struct socket *so, int v, int cmd) { - V_ip6_mrouter_ver = 0; - #ifdef MRT6DEBUG - V_mrt6debug = 0; - if (V_mrt6debug) log(LOG_DEBUG, "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n", diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index a0a0f3a18d8..3e91a79d0b9 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -295,12 +295,20 @@ VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */ VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ VNET_DECLARE(int, ip6_forwarding); /* act as router? */ -VNET_DECLARE(int, ip6_gif_hlim); /* Hop limit for gif encap packet */ VNET_DECLARE(int, ip6_use_deprecated); /* allow deprecated addr as source */ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix * walk list every 5 sec. */ VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ VNET_DECLARE(int, ip6_v6only); +#define V_ip6stat VNET(ip6stat) +#define V_ip6_defhlim VNET(ip6_defhlim) +#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) +#define V_ip6_forwarding VNET(ip6_forwarding) +#define V_ip6_use_deprecated VNET(ip6_use_deprecated) +#define V_ip6_rr_prune VNET(ip6_rr_prune) +#define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) +#define V_ip6_v6only VNET(ip6_v6only) + VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly @@ -314,31 +322,6 @@ VNET_DECLARE(time_t, ip6_log_time); VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension * headers */ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ - -VNET_DECLARE(int, ip6_auto_flowlabel); -VNET_DECLARE(int, ip6_auto_linklocal); - -VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ -VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary - * addresses in the source address - * selection */ - -#ifdef IPSTEALTH -VNET_DECLARE(int, ip6stealth); -#endif - -VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope - * zone when unspecified */ - -#define V_ip6stat VNET(ip6stat) -#define V_ip6_defhlim VNET(ip6_defhlim) -#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) -#define V_ip6_forwarding VNET(ip6_forwarding) -#define V_ip6_gif_hlim VNET(ip6_gif_hlim) -#define V_ip6_use_deprecated VNET(ip6_use_deprecated) -#define V_ip6_rr_prune VNET(ip6_rr_prune) -#define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) -#define V_ip6_v6only VNET(ip6_v6only) #define V_ip6_mrouter VNET(ip6_mrouter) #define V_ip6_sendredirects VNET(ip6_sendredirects) #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) @@ -349,17 +332,29 @@ VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope #define V_ip6_log_time VNET(ip6_log_time) #define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) #define V_ip6_dad_count VNET(ip6_dad_count) + +VNET_DECLARE(int, ip6_auto_flowlabel); +VNET_DECLARE(int, ip6_auto_linklocal); #define V_ip6_auto_flowlabel VNET(ip6_auto_flowlabel) #define V_ip6_auto_linklocal VNET(ip6_auto_linklocal) + +VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ +VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary + * addresses in the source address + * selection */ #define V_ip6_use_tempaddr VNET(ip6_use_tempaddr) #define V_ip6_prefer_tempaddr VNET(ip6_prefer_tempaddr) -#ifdef IPSTEALTH -#define V_ip6stealth VNET(ip6stealth) -#endif + +VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope + * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ #define V_inet6_pfil_hook VNET(inet6_pfil_hook) +#ifdef IPSTEALTH +VNET_DECLARE(int, ip6stealth); +#define V_ip6stealth VNET(ip6stealth) +#endif extern struct pr_usrreqs rip6_usrreqs; struct sockopt; diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index a0ef2040de1..7a54226cc63 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -82,21 +82,31 @@ __FBSDID("$FreeBSD$"); #define SIN6(s) ((struct sockaddr_in6 *)s) -VNET_DEFINE(int, nd6_prune); -VNET_DEFINE(int, nd6_delay); -VNET_DEFINE(int, nd6_umaxtries); -VNET_DEFINE(int, nd6_mmaxtries); -VNET_DEFINE(int, nd6_useloopback); -VNET_DEFINE(int, nd6_gctimer); +/* timer values */ +VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ +VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ +VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ +VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ +VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for + * local traffic */ +VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage + * collection timer */ /* preventing too many loops in ND option parsing */ -static VNET_DEFINE(int, nd6_maxndopt); -VNET_DEFINE(int, nd6_maxnudhint); -static VNET_DEFINE(int, nd6_maxqueuelen); +static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ + +VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper + * layer hints */ +static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved + * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) -VNET_DEFINE(int, nd6_debug); +#ifdef ND6_DEBUG +VNET_DEFINE(int, nd6_debug) = 1; +#else +VNET_DEFINE(int, nd6_debug) = 0; +#endif /* for debugging? */ #if 0 @@ -106,7 +116,7 @@ static int nd6_inuse, nd6_allocated; VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); -VNET_DEFINE(int, nd6_recalc_reachtm_interval); +VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static struct sockaddr_in6 all1_sa; @@ -125,56 +135,13 @@ static VNET_DEFINE(struct callout, nd6_slowtimo_ch); VNET_DEFINE(struct callout, nd6_timer_ch); -VNET_DECLARE(int, dad_ignore_ns); -VNET_DECLARE(int, dad_maxtry); -#define V_dad_ignore_ns VNET(dad_ignore_ns) -#define V_dad_maxtry VNET(dad_maxtry) - void nd6_init(void) { int i; - V_nd6_prune = 1; /* walk list every 1 seconds */ - V_nd6_delay = 5; /* delay first probe time 5 second */ - V_nd6_umaxtries = 3; /* maximum unicast query */ - V_nd6_mmaxtries = 3; /* maximum multicast query */ - V_nd6_useloopback = 1; /* use loopback interface for local traffic */ - V_nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ - - /* preventing too many loops in ND option parsing */ - V_nd6_maxndopt = 10; /* max # of ND options allowed */ - - V_nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */ - V_nd6_maxqueuelen = 1; /* max pkts cached in unresolved ND entries */ - -#ifdef ND6_DEBUG - V_nd6_debug = 1; -#else - V_nd6_debug = 0; -#endif - - V_nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; - - V_dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/ - V_dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ - - /* - * XXX just to get this to compile KMM - */ -#ifdef notyet - V_llinfo_nd6.ln_next = &V_llinfo_nd6; - V_llinfo_nd6.ln_prev = &V_llinfo_nd6; -#endif LIST_INIT(&V_nd_prefix); - V_ip6_use_tempaddr = 0; - V_ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME; - V_ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME; - V_ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; - - V_ip6_desync_factor = 0; - all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) @@ -182,13 +149,13 @@ nd6_init(void) /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); + /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); } - #ifdef VIMAGE void nd6_destroy() diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index ff8faa2acfa..abcfcb79803 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -330,8 +330,6 @@ VNET_DECLARE(struct nd_drhead, nd_defrouter); VNET_DECLARE(struct nd_prhead, nd_prefix); VNET_DECLARE(int, nd6_debug); VNET_DECLARE(int, nd6_onlink_ns_rfc4861); -VNET_DECLARE(struct callout, nd6_timer_ch); - #define V_nd6_prune VNET(nd6_prune) #define V_nd6_delay VNET(nd6_delay) #define V_nd6_umaxtries VNET(nd6_umaxtries) @@ -343,6 +341,10 @@ VNET_DECLARE(struct callout, nd6_timer_ch); #define V_nd_prefix VNET(nd_prefix) #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) + +#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) + +VNET_DECLARE(struct callout, nd6_timer_ch); #define V_nd6_timer_ch VNET(nd6_timer_ch) /* nd6_rtr.c */ @@ -351,15 +353,12 @@ VNET_DECLARE(int, ip6_desync_factor); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */ - #define V_nd6_defifindex VNET(nd6_defifindex) #define V_ip6_desync_factor VNET(ip6_desync_factor) #define V_ip6_temp_preferred_lifetime VNET(ip6_temp_preferred_lifetime) #define V_ip6_temp_valid_lifetime VNET(ip6_temp_valid_lifetime) #define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance) -#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) - union nd_opts { struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */ struct { diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index f9061d36f8f..a50925be5dc 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -89,9 +89,8 @@ static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); static void nd6_dad_ns_input(struct ifaddr *); static void nd6_dad_na_input(struct ifaddr *); -VNET_DEFINE(int, dad_ignore_ns); -VNET_DEFINE(int, dad_maxtry); - +VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ +VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_ignore_ns VNET(dad_ignore_ns) #define V_dad_maxtry VNET(dad_maxtry) @@ -1124,9 +1123,8 @@ struct dadq { }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); +VNET_DEFINE(int, dad_init) = 0; #define V_dadq VNET(dadq) - -VNET_DEFINE(int, dad_init); #define V_dad_init VNET(dad_init) static struct dadq * diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 74f4954bf0f..19ec989fb22 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -90,14 +90,16 @@ VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static VNET_DEFINE(struct ifnet *, nd6_defifp); +VNET_DEFINE(int, nd6_defifindex); #define V_nd6_defifp VNET(nd6_defifp) -VNET_DEFINE(int, nd6_defifindex); -VNET_DEFINE(int, ip6_use_tempaddr); +VNET_DEFINE(int, ip6_use_tempaddr) = 0; + VNET_DEFINE(int, ip6_desync_factor); -VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime); -VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime); -VNET_DEFINE(int, ip6_temp_regen_advance); +VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME; +VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; + +VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; /* RTPREF_MEDIUM has to be 0! */ #define RTPREF_HIGH 1 diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 9f1236ab1bd..939aa6a5f98 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -119,11 +119,11 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) -VNET_DEFINE(struct rip6stat, rip6stat); - extern u_long rip_sendspace; extern u_long rip_recvspace; +VNET_DEFINE(struct rip6stat, rip6stat); + /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. @@ -465,7 +465,7 @@ rip6_output(m, va_alist) &oifp, &in6a); if (error) goto bad; - error = prison_get_ip6(in6p->inp_cred, &in6a); + error = prison_check_ip6(in6p->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index cced5e304ac..8189d87c5bc 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -50,6 +50,11 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef ENABLE_DEFAULT_SCOPE +VNET_DEFINE(int, ip6_use_defzone) = 1; +#else +VNET_DEFINE(int, ip6_use_defzone) = 0; +#endif /* * The scope6_lock protects the global sid default stored in @@ -62,8 +67,6 @@ static struct mtx scope6_lock; #define SCOPE6_LOCK_ASSERT() mtx_assert(&scope6_lock, MA_OWNED) static VNET_DEFINE(struct scope6_id, sid_default); -VNET_DEFINE(int, ip6_use_defzone); - #define V_sid_default VNET(sid_default) #define SID(ifp) \ @@ -73,11 +76,6 @@ void scope6_init(void) { -#ifdef ENABLE_DEFAULT_SCOPE - V_ip6_use_defzone = 1; -#else - V_ip6_use_defzone = 0; -#endif bzero(&V_sid_default, sizeof(V_sid_default)); if (!IS_DEFAULT_VNET(curvnet)) diff --git a/sys/netipsec/ah_var.h b/sys/netipsec/ah_var.h index b031a987b60..6145dba9997 100644 --- a/sys/netipsec/ah_var.h +++ b/sys/netipsec/ah_var.h @@ -72,10 +72,11 @@ struct ahstat { #ifdef _KERNEL VNET_DECLARE(int, ah_enable); -#define V_ah_enable VNET(ah_enable) VNET_DECLARE(int, ah_cleartos); -#define V_ah_cleartos VNET(ah_cleartos) VNET_DECLARE(struct ahstat, ahstat); + +#define V_ah_enable VNET(ah_enable) +#define V_ah_cleartos VNET(ah_cleartos) #define V_ahstat VNET(ahstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_AH_VAR_H_*/ diff --git a/sys/netipsec/esp_var.h b/sys/netipsec/esp_var.h index dc1aad4ca44..477dcbf5b39 100644 --- a/sys/netipsec/esp_var.h +++ b/sys/netipsec/esp_var.h @@ -73,8 +73,9 @@ struct espstat { #ifdef _KERNEL VNET_DECLARE(int, esp_enable); -#define V_esp_enable VNET(esp_enable) VNET_DECLARE(struct espstat, espstat); + +#define V_esp_enable VNET(esp_enable) #define V_espstat VNET(espstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_ESP_VAR_H_*/ diff --git a/sys/netipsec/ipcomp_var.h b/sys/netipsec/ipcomp_var.h index 78f18488221..c99a3be7d90 100644 --- a/sys/netipsec/ipcomp_var.h +++ b/sys/netipsec/ipcomp_var.h @@ -66,8 +66,9 @@ struct ipcompstat { #ifdef _KERNEL VNET_DECLARE(int, ipcomp_enable); -#define V_ipcomp_enable VNET(ipcomp_enable) VNET_DECLARE(struct ipcompstat, ipcompstat); + +#define V_ipcomp_enable VNET(ipcomp_enable) #define V_ipcompstat VNET(ipcompstat) #endif /* _KERNEL */ #endif /*_NETIPSEC_IPCOMP_VAR_H_*/ diff --git a/sys/netipsec/ipip_var.h b/sys/netipsec/ipip_var.h index a6e33e8b435..3c8c3974d60 100644 --- a/sys/netipsec/ipip_var.h +++ b/sys/netipsec/ipip_var.h @@ -60,8 +60,9 @@ struct ipipstat #ifdef _KERNEL VNET_DECLARE(int, ipip_allow); -#define V_ipip_allow VNET(ipip_allow) VNET_DECLARE(struct ipipstat, ipipstat); + +#define V_ipip_allow VNET(ipip_allow) #define V_ipipstat VNET(ipipstat) #endif /* _KERNEL */ #endif /* _NETINET_IPIP_H_ */ diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index 5ee4bbb8f40..1f48dd6c3da 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -102,6 +102,7 @@ VNET_DEFINE(int, ipsec_debug) = 1; #else VNET_DEFINE(int, ipsec_debug) = 0; #endif + /* NB: name changed so netstat doesn't use it. */ VNET_DEFINE(struct ipsecstat, ipsec4stat); VNET_DEFINE(int, ip4_ah_offsetmask) = 0; /* maybe IP_DF? */ diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 92539b9cf32..090255aafb2 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -334,39 +334,39 @@ struct ipsec_history { VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) -VNET_DECLARE(struct ipsecstat, ipsec4stat); -#define V_ipsec4stat VNET(ipsec4stat) -VNET_DECLARE(int, ip4_ah_offsetmask); -#define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) -VNET_DECLARE(int, ip4_ipsec_dfbit); -#define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) -VNET_DECLARE(int, ip4_esp_trans_deflev); -#define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) -VNET_DECLARE(int, ip4_esp_net_deflev); -#define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) -VNET_DECLARE(int, ip4_ah_trans_deflev); -#define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) -VNET_DECLARE(int, ip4_ah_net_deflev); -#define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) -VNET_DECLARE(struct secpolicy, ip4_def_policy); -#define V_ip4_def_policy VNET(ip4_def_policy) -VNET_DECLARE(int, ip4_ipsec_ecn); -#define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) -VNET_DECLARE(int, ip4_esp_randpad); -#define V_ip4_esp_randpad VNET(ip4_esp_randpad) - -VNET_DECLARE(int, crypto_support); -#define V_crypto_support VNET(crypto_support) - -extern int ip4_ah_cleartos; #ifdef REGRESSION VNET_DECLARE(int, ipsec_replay); -#define V_ipsec_replay VNET(ipsec_replay) VNET_DECLARE(int, ipsec_integrity); + +#define V_ipsec_replay VNET(ipsec_replay) #define V_ipsec_integrity VNET(ipsec_integrity) #endif +VNET_DECLARE(struct ipsecstat, ipsec4stat); +VNET_DECLARE(struct secpolicy, ip4_def_policy); +VNET_DECLARE(int, ip4_esp_trans_deflev); +VNET_DECLARE(int, ip4_esp_net_deflev); +VNET_DECLARE(int, ip4_ah_trans_deflev); +VNET_DECLARE(int, ip4_ah_net_deflev); +VNET_DECLARE(int, ip4_ah_offsetmask); +VNET_DECLARE(int, ip4_ipsec_dfbit); +VNET_DECLARE(int, ip4_ipsec_ecn); +VNET_DECLARE(int, ip4_esp_randpad); +VNET_DECLARE(int, crypto_support); + +#define V_ipsec4stat VNET(ipsec4stat) +#define V_ip4_def_policy VNET(ip4_def_policy) +#define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) +#define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) +#define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) +#define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) +#define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) +#define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) +#define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) +#define V_ip4_esp_randpad VNET(ip4_esp_randpad) +#define V_crypto_support VNET(crypto_support) + #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 030113f1413..c004220e2bf 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -42,16 +42,17 @@ #ifdef _KERNEL VNET_DECLARE(struct ipsecstat, ipsec6stat); -#define V_ipsec6stat VNET(ipsec6stat) VNET_DECLARE(int, ip6_esp_trans_deflev); -#define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev) VNET_DECLARE(int, ip6_esp_net_deflev); -#define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev) VNET_DECLARE(int, ip6_ah_trans_deflev); -#define V_ip6_ah_trans_deflev VNET(ip6_ah_trans_deflev) VNET_DECLARE(int, ip6_ah_net_deflev); -#define V_ip6_ah_net_deflev VNET(ip6_ah_net_deflev) VNET_DECLARE(int, ip6_ipsec_ecn); + +#define V_ipsec6stat VNET(ipsec6stat) +#define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev) +#define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev) +#define V_ip6_ah_trans_deflev VNET(ip6_ah_trans_deflev) +#define V_ip6_ah_net_deflev VNET(ip6_ah_net_deflev) #define V_ip6_ipsec_ecn VNET(ip6_ipsec_ecn) struct inpcb; diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index e3a61aced00..d00489db907 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -114,27 +114,27 @@ VNET_DEFINE(u_int32_t, key_debug_level) = 0; static VNET_DEFINE(u_int, key_spi_trycnt) = 1000; -#define V_key_spi_trycnt VNET(key_spi_trycnt) static VNET_DEFINE(u_int32_t, key_spi_minval) = 0x100; -#define V_key_spi_minval VNET(key_spi_minval) static VNET_DEFINE(u_int32_t, key_spi_maxval) = 0x0fffffff; /* XXX */ -#define V_key_spi_maxval VNET(key_spi_maxval) static VNET_DEFINE(u_int32_t, policy_id) = 0; -#define V_policy_id VNET(policy_id) /*interval to initialize randseed,1(m)*/ static VNET_DEFINE(u_int, key_int_random) = 60; -#define V_key_int_random VNET(key_int_random) /* interval to expire acquiring, 30(s)*/ static VNET_DEFINE(u_int, key_larval_lifetime) = 30; -#define V_key_larval_lifetime VNET(key_larval_lifetime) /* counter for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_count) = 10; -#define V_key_blockacq_count VNET(key_blockacq_count) /* lifetime for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_lifetime) = 20; -#define V_key_blockacq_lifetime VNET(key_blockacq_lifetime) /* preferred old sa rather than new sa.*/ static VNET_DEFINE(int, key_preferred_oldsa) = 1; +#define V_key_spi_trycnt VNET(key_spi_trycnt) +#define V_key_spi_minval VNET(key_spi_minval) +#define V_key_spi_maxval VNET(key_spi_maxval) +#define V_policy_id VNET(policy_id) +#define V_key_int_random VNET(key_int_random) +#define V_key_larval_lifetime VNET(key_larval_lifetime) +#define V_key_blockacq_count VNET(key_blockacq_count) +#define V_key_blockacq_lifetime VNET(key_blockacq_lifetime) #define V_key_preferred_oldsa VNET(key_preferred_oldsa) static VNET_DEFINE(u_int32_t, acq_seq) = 0; @@ -270,10 +270,11 @@ static const int maxsize[] = { }; static VNET_DEFINE(int, ipsec_esp_keymin) = 256; -#define V_ipsec_esp_keymin VNET(ipsec_esp_keymin) static VNET_DEFINE(int, ipsec_esp_auth) = 0; -#define V_ipsec_esp_auth VNET(ipsec_esp_auth) static VNET_DEFINE(int, ipsec_ah_keymin) = 128; + +#define V_ipsec_esp_keymin VNET(ipsec_esp_keymin) +#define V_ipsec_esp_auth VNET(ipsec_esp_auth) #define V_ipsec_ah_keymin VNET(ipsec_ah_keymin) #ifdef SYSCTL_DECL @@ -1882,7 +1883,9 @@ key_spdadd(so, m, mhp) newsp = key_getsp(&spidx); if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { if (newsp) { + SPTREE_LOCK(); newsp->state = IPSEC_SPSTATE_DEAD; + SPTREE_UNLOCK(); KEY_FREESP(&newsp); } } else { @@ -2117,7 +2120,9 @@ key_spddelete(so, m, mhp) /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; + SPTREE_LOCK(); sp->state = IPSEC_SPSTATE_DEAD; + SPTREE_UNLOCK(); KEY_FREESP(&sp); { @@ -2184,7 +2189,9 @@ key_spddelete2(so, m, mhp) return key_senderror(so, m, EINVAL); } + SPTREE_LOCK(); sp->state = IPSEC_SPSTATE_DEAD; + SPTREE_UNLOCK(); KEY_FREESP(&sp); { @@ -5149,12 +5156,6 @@ key_update(so, m, mhp) return key_senderror(so, m, error); } - /* check SA values to be mature. */ - if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { - KEY_FREESAV(&sav); - return key_senderror(so, m, 0); - } - #ifdef IPSEC_NAT_T /* * Handle more NAT-T info if present, @@ -5181,6 +5182,12 @@ key_update(so, m, mhp) #endif #endif + /* check SA values to be mature. */ + if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { + KEY_FREESAV(&sav); + return key_senderror(so, m, 0); + } + { struct mbuf *n; @@ -5415,12 +5422,6 @@ key_add(so, m, mhp) return key_senderror(so, m, error); } - /* check SA values to be mature. */ - if ((error = key_mature(newsav)) != 0) { - KEY_FREESAV(&newsav); - return key_senderror(so, m, error); - } - #ifdef IPSEC_NAT_T /* * Handle more NAT-T info if present, @@ -5440,6 +5441,12 @@ key_add(so, m, mhp) #endif #endif + /* check SA values to be mature. */ + if ((error = key_mature(newsav)) != 0) { + KEY_FREESAV(&newsav); + return key_senderror(so, m, error); + } + /* * don't call key_freesav() here, as we would like to keep the SA * in the database on success. diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c index dbacd771475..6186b5961a5 100644 --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -85,8 +85,7 @@ SYSCTL_VNET_INT(_net_inet_esp, OID_AUTO, SYSCTL_VNET_STRUCT(_net_inet_esp, IPSECCTL_STATS, stats, CTLFLAG_RD, &VNET_NAME(espstat), espstat, ""); -/* max iv length over all algorithms */ -static VNET_DEFINE(int, esp_max_ivlen) = 0; +static VNET_DEFINE(int, esp_max_ivlen); /* max iv length over all algorithms */ #define V_esp_max_ivlen VNET(esp_max_ivlen) static int esp_input_cb(struct cryptop *op); diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index cec0220b7f1..e85fab8975d 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -45,8 +45,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #include @@ -131,12 +129,13 @@ nfs_getpages(struct vop_getpages_args *ap) */ VM_OBJECT_LOCK(object); if (pages[ap->a_reqpage]->valid != 0) { - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -171,12 +170,13 @@ nfs_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { nfs_printf("nfs_getpages: error %d\n", error); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) + if (i != ap->a_reqpage) { + vm_page_lock(pages[i]); vm_page_free(pages[i]); + vm_page_unlock(pages[i]); + } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -189,7 +189,6 @@ nfs_getpages(struct vop_getpages_args *ap) size = count - uio.uio_resid; VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -232,17 +231,23 @@ nfs_getpages(struct vop_getpages_args *ap) * now tell them that it is ok to use. */ if (!error) { - if (m->oflags & VPO_WANTED) + if (m->oflags & VPO_WANTED) { + vm_page_lock(m); vm_page_activate(m); - else + vm_page_unlock(m); + } else { + vm_page_lock(m); vm_page_deactivate(m); + vm_page_unlock(m); + } vm_page_wakeup(m); } else { + vm_page_lock(m); vm_page_free(m); + vm_page_unlock(m); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); return (0); } @@ -869,7 +874,6 @@ nfs_write(struct vop_write_args *ap) daddr_t lbn; int bcount; int n, on, error = 0; - struct proc *p = td?td->td_proc:NULL; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -950,16 +954,8 @@ flush_and_restart: * Maybe this should be above the vnode op call, but so long as * file servers have no limits, i don't think it matters */ - if (p != NULL) { - PROC_LOCK(p); - if (uio->uio_offset + uio->uio_resid > - lim_cur(p, RLIMIT_FSIZE)) { - psignal(p, SIGXFSZ); - PROC_UNLOCK(p); - return (EFBIG); - } - PROC_UNLOCK(p); - } + if (vn_rlimit_fsize(vp, uio, td)) + return (EFBIG); biosize = vp->v_mount->mnt_stat.f_iosize; /* diff --git a/sys/pc98/cbus/clock.c b/sys/pc98/cbus/clock.c index 3028c074d15..8c5a7d2af75 100644 --- a/sys/pc98/cbus/clock.c +++ b/sys/pc98/cbus/clock.c @@ -175,8 +175,8 @@ clkintr(struct trapframe *frame) * timers. */ int cpu = PCPU_GET(cpuid); - if (lapic_cyclic_clock_func[cpu] != NULL) - (*lapic_cyclic_clock_func[cpu])(frame); + if (cyclic_clock_func[cpu] != NULL) + (*cyclic_clock_func[cpu])(frame); #endif #ifdef SMP diff --git a/sys/pc98/conf/GENERIC b/sys/pc98/conf/GENERIC index 85239cbdcc3..699091ecd75 100644 --- a/sys/pc98/conf/GENERIC +++ b/sys/pc98/conf/GENERIC @@ -247,6 +247,7 @@ device firmware # firmware assist module device bpf # Berkeley packet filter # USB support +#options USB_DEBUG # enable debug msgs #device uhci # UHCI PCI->USB interface #device ohci # OHCI PCI->USB interface #device ehci # EHCI PCI->USB interface (USB 2.0) diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index f470b5ef5ef..e0482e2198e 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -874,7 +874,8 @@ freebsd4_sigreturn(td, uap) * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); + uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", + td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } @@ -885,7 +886,8 @@ freebsd4_sigreturn(td, uap) */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { - printf("freebsd4_sigreturn: cs = 0x%x\n", cs); + uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", + td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; @@ -986,7 +988,8 @@ sigreturn(td, uap) * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - printf("sigreturn: eflags = 0x%x\n", eflags); + uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", + td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } @@ -997,7 +1000,8 @@ sigreturn(td, uap) */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { - printf("sigreturn: cs = 0x%x\n", cs); + uprintf("pid %d (%s): sigreturn cs = 0x%x\n", + td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; diff --git a/sys/pci/if_rl.c b/sys/pci/if_rl.c index 8b225169a9c..cd7609db697 100644 --- a/sys/pci/if_rl.c +++ b/sys/pci/if_rl.c @@ -942,8 +942,8 @@ rl_attach(device_t dev) #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif - IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); - ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 8357929e4f2..95936ed01b1 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -305,6 +305,7 @@ vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea_init(mmu_t); boolean_t moea_is_modified(mmu_t, vm_page_t); +boolean_t moea_is_referenced(mmu_t, vm_page_t); boolean_t moea_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t); @@ -344,6 +345,7 @@ static mmu_method_t moea_methods[] = { MMUMETHOD(mmu_extract_and_hold, moea_extract_and_hold), MMUMETHOD(mmu_init, moea_init), MMUMETHOD(mmu_is_modified, moea_is_modified), + MMUMETHOD(mmu_is_referenced, moea_is_referenced), MMUMETHOD(mmu_ts_referenced, moea_ts_referenced), MMUMETHOD(mmu_map, moea_map), MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick), @@ -1239,18 +1241,22 @@ moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1268,6 +1274,15 @@ moea_init(mmu_t mmu) moea_initialized = TRUE; } +boolean_t +moea_is_referenced(mmu_t mmu, vm_page_t m) +{ + + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (FALSE); + return (moea_query_bit(m, PTE_REF)); +} + boolean_t moea_is_modified(mmu_t mmu, vm_page_t m) { diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 97fff631277..0483b41c4ba 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -379,6 +379,7 @@ vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); +boolean_t moea64_is_referenced(mmu_t, vm_page_t); boolean_t moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); @@ -416,6 +417,7 @@ static mmu_method_t moea64_bridge_methods[] = { MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), + MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), @@ -1372,18 +1374,23 @@ moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, + pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -1462,6 +1469,15 @@ moea64_init(mmu_t mmu) moea64_initialized = TRUE; } +boolean_t +moea64_is_referenced(mmu_t mmu, vm_page_t m) +{ + + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (FALSE); + return (moea64_query_bit(m, PTE_REF)); +} + boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 13e637ce603..78e9afb5530 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -288,6 +288,7 @@ static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, static void mmu_booke_init(mmu_t); static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); +static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); static boolean_t mmu_booke_ts_referenced(mmu_t, vm_page_t); static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); @@ -342,6 +343,7 @@ static mmu_method_t mmu_booke_methods[] = { MMUMETHOD(mmu_init, mmu_booke_init), MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), + MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), MMUMETHOD(mmu_map, mmu_booke_map), MMUMETHOD(mmu_mincore, mmu_booke_mincore), @@ -1913,16 +1915,11 @@ mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, tlb_miss_lock(); /* Handle modified pages. */ - if (PTE_ISMODIFIED(pte)) + if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) vm_page_dirty(m); - /* Referenced pages. */ - if (PTE_ISREFERENCED(pte)) - vm_page_flag_set(m, PG_REFERENCED); - tlb0_flush_entry(va); - pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | - PTE_REFERENCED); + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); @@ -1960,13 +1957,8 @@ mmu_booke_remove_write(mmu_t mmu, vm_page_t m) if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); - /* Referenced pages. */ - if (PTE_ISREFERENCED(pte)) - vm_page_flag_set(m, PG_REFERENCED); - /* Flush mapping from TLB0. */ - pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | - PTE_REFERENCED); + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); @@ -2032,11 +2024,12 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_t *pte; vm_page_t m; uint32_t pte_wbit; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); - +retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2045,12 +2038,14 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_wbit = PTE_UW; if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(PTE_PA(pte)); vm_page_hold(m); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -2180,6 +2175,33 @@ mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) return (FALSE); } +/* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +static boolean_t +mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + boolean_t rv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rv = FALSE; + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (rv); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && + PTE_ISVALID(pte)) + rv = PTE_ISREFERENCED(pte) ? TRUE : FALSE; + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + return (rv); +} + /* * Clear the modify bits on the specified physical page. */ diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC index 5c76272f1e0..86f56cc1dc7 100644 --- a/sys/powerpc/conf/GENERIC +++ b/sys/powerpc/conf/GENERIC @@ -144,6 +144,7 @@ device firmware # firmware assist module device bpf #Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface diff --git a/sys/powerpc/conf/MPC85XX b/sys/powerpc/conf/MPC85XX index 51d4e8d5342..a2a8de4a9a3 100644 --- a/sys/powerpc/conf/MPC85XX +++ b/sys/powerpc/conf/MPC85XX @@ -79,6 +79,7 @@ device sec device tsec device tun device uart +options USB_DEBUG # enable debug msgs #device uhci device umass device usb diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index a23052ea975..a49bd6ce8ae 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -88,6 +88,8 @@ struct pmap { struct mtx pm_mtx; u_int pm_sr[16]; u_int pm_active; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; u_int pm_context; struct pmap *pmap_phys; diff --git a/sys/powerpc/include/proc.h b/sys/powerpc/include/proc.h index c958fb76f0f..d51116bb013 100644 --- a/sys/powerpc/include/proc.h +++ b/sys/powerpc/include/proc.h @@ -46,4 +46,6 @@ struct mdthread { struct mdproc { }; +#define KINFO_PROC_SIZE 768 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/powerpc/powerpc/mmu_if.m b/sys/powerpc/powerpc/mmu_if.m index 5b8ba14d689..a87e5d8c89d 100644 --- a/sys/powerpc/powerpc/mmu_if.m +++ b/sys/powerpc/powerpc/mmu_if.m @@ -345,6 +345,20 @@ METHOD boolean_t is_prefaultable { } DEFAULT mmu_null_is_prefaultable; +/** + * @brief Return whether or not the specified physical page was referenced + * in any physical maps. + * + * @params _pg physical page + * + * @retval boolean TRUE if page has been referenced + */ +METHOD boolean_t is_referenced { + mmu_t _mmu; + vm_page_t _pg; +}; + + /** * @brief Return a count of referenced bits for a page, clearing those bits. * Not all referenced bits need to be cleared, but it is necessary that 0 diff --git a/sys/powerpc/powerpc/pmap_dispatch.c b/sys/powerpc/powerpc/pmap_dispatch.c index 2b45e17c6f3..c16360f20f1 100644 --- a/sys/powerpc/powerpc/pmap_dispatch.c +++ b/sys/powerpc/powerpc/pmap_dispatch.c @@ -194,6 +194,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t va) return (MMU_IS_PREFAULTABLE(mmu_obj, pmap, va)); } +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + CTR2(KTR_PMAP, "%s(%p)", __func__, m); + return (MMU_IS_REFERENCED(mmu_obj, m)); +} + boolean_t pmap_ts_referenced(vm_page_t m) { diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c index faa45835318..b4713cc8333 100644 --- a/sys/security/audit/audit_bsm.c +++ b/sys/security/audit/audit_bsm.c @@ -740,6 +740,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_LUTIMES: case AUE_NFS_GETFH: case AUE_LSTAT: + case AUE_LPATHCONF: case AUE_PATHCONF: case AUE_READLINK: case AUE_REVOKE: diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index be3cfd3f9ab..9ce2d6c4cb8 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -226,6 +226,7 @@ device firmware # firmware assist module device bpf # Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) diff --git a/sys/sparc64/include/asi.h b/sys/sparc64/include/asi.h index 90dc3f394e1..e5769c125bd 100644 --- a/sys/sparc64/include/asi.h +++ b/sys/sparc64/include/asi.h @@ -82,7 +82,10 @@ #define ASI_DCACHE_SNOOP_TAG 0x44 /* US-III Cu */ /* Named ASI_DCUCR on US-III, but is mostly identical except for added bits. */ -#define ASI_LSU_CTL_REG 0x45 +#define ASI_LSU_CTL_REG 0x45 /* US only */ + +#define ASI_MCNTL 0x45 /* SPARC64 only */ +#define AA_MCNTL 0x08 #define ASI_DCACHE_DATA 0x46 #define ASI_DCACHE_TAG 0x47 @@ -167,6 +170,8 @@ #define ASI_ICACHE_PRE_DECODE 0x6e /* US-I, II */ #define ASI_ICACHE_PRE_NEXT_FIELD 0x6f /* US-I, II */ +#define ASI_FLUSH_L1I 0x67 /* SPARC64 only */ + #define ASI_BLK_AUIP 0x70 #define ASI_BLK_AIUS 0x71 diff --git a/sys/sparc64/include/cache.h b/sys/sparc64/include/cache.h index 9c4804b9776..29bff69d937 100644 --- a/sys/sparc64/include/cache.h +++ b/sys/sparc64/include/cache.h @@ -113,6 +113,10 @@ extern cache_flush_t *cache_flush; extern dcache_page_inval_t *dcache_page_inval; extern icache_page_inval_t *icache_page_inval; +cache_flush_t zeus_cache_flush; +dcache_page_inval_t zeus_dcache_page_inval; +icache_page_inval_t zeus_icache_page_inval; + #endif /* KERNEL */ #endif /* !LOCORE */ diff --git a/sys/sparc64/include/mcntl.h b/sys/sparc64/include/mcntl.h new file mode 100644 index 00000000000..e9483c05477 --- /dev/null +++ b/sys/sparc64/include/mcntl.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2010 Marius Strobl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_MCNTL_H +#define _MACHINE_MCNTL_H + +/* + * Definitions for the SPARC64 V, VI, VII and VIIIfx Memory Control Register + */ +#define MCNTL_JPS1_TSBP (1UL << 8) + +#define MCNTL_RMD_SHIFT 12 +#define MCNTL_RMD_BITS 2 +#define MCNTL_RMD_MASK \ + (((1UL << MCNTL_RMD_BITS) - 1) << MCNTL_RMD_SHIFT) +#define MCNTL_RMD_FULL (0UL << MCNTL_RMD_SHIFT) +#define MCNTL_RMD_1024 (2UL << MCNTL_RMD_SHIFT) +#define MCNTL_RMD_512 (3UL << MCNTL_RMD_SHIFT) + +#define MCNTL_FW_FDTLB (1UL << 14) +#define MCNTL_FW_FITLB (1UL << 15) +#define MCNTL_NC_CACHE (1UL << 16) + +/* The following bits are valid for the SPARC64 VI, VII and VIIIfx only. */ +#define MCNTL_MPG_SDTLB (1UL << 6) +#define MCNTL_MPG_SITLB (1UL << 7) + +/* The following bits are valid for the SPARC64 VIIIfx only. */ +#define MCNTL_HPF_SHIFT 18 +#define MCNTL_HPF_BITS 2 +#define MCNTL_HPF_MASK \ + (((1UL << MCNTL_HPF_BITS) - 1) << MCNTL_HPF_SHIFT) +#define MCNTL_HPF_STRONG (0UL << MCNTL_HPF_SHIFT) +#define MCNTL_HPF_NOT (1UL << MCNTL_HPF_SHIFT) +#define MCNTL_HPF_WEAK (2UL << MCNTL_HPF_SHIFT) + +#endif /* _MACHINE_MCNTL_H */ diff --git a/sys/sparc64/include/ofw_machdep.h b/sys/sparc64/include/ofw_machdep.h index 625b13137ad..658d9c7b0d2 100644 --- a/sys/sparc64/include/ofw_machdep.h +++ b/sys/sparc64/include/ofw_machdep.h @@ -36,6 +36,7 @@ typedef uint64_t cell_t; int OF_decode_addr(phandle_t, int, int *, bus_addr_t *); void OF_getetheraddr(device_t, u_char *); +u_int OF_getscsinitid(device_t); void cpu_shutdown(void *); int ofw_entry(void *); void ofw_exit(void *); diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index 83c81904b50..388f7512dd4 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -62,6 +62,8 @@ struct pmap { struct tte *pm_tsb; vm_object_t pm_tsb_obj; u_int pm_active; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; diff --git a/sys/sparc64/include/proc.h b/sys/sparc64/include/proc.h index bfd1268e433..3c236637fb9 100644 --- a/sys/sparc64/include/proc.h +++ b/sys/sparc64/include/proc.h @@ -51,4 +51,6 @@ struct mdproc { void *md_sigtramp; }; +#define KINFO_PROC_SIZE 1088 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/sparc64/isa/isa.c b/sys/sparc64/isa/isa.c index 724b4a81f5d..9159cdac942 100644 --- a/sys/sparc64/isa/isa.c +++ b/sys/sparc64/isa/isa.c @@ -116,7 +116,7 @@ isa_init(device_t dev) } static const struct { - const char *name; + const char *const name; uint32_t id; } const ofw_isa_pnp_map[] = { { "SUNW,lomh", 0x0000ae4e }, /* SUN0000 */ @@ -126,6 +126,7 @@ static const struct { { "flashprom", 0x0100ae4e }, /* SUN0001 */ { "parallel", 0x0104d041 }, /* PNP0401 */ { "serial", 0x0105d041 }, /* PNP0501 */ + { "su", 0x0105d041 }, /* PNP0501 */ { "i2c", 0x0200ae4e }, /* SUN0002 */ { "rmc-comm", 0x0300ae4e }, /* SUN0003 */ { "kb_ps2", 0x0303d041 }, /* PNP0303 */ diff --git a/sys/sparc64/sparc64/cache.c b/sys/sparc64/sparc64/cache.c index 8cb8dee8a20..a3c6119ab97 100644 --- a/sys/sparc64/sparc64/cache.c +++ b/sys/sparc64/sparc64/cache.c @@ -141,7 +141,12 @@ cache_init(struct pcpu *pcpu) if ((pcpu->pc_cache.dc_size & ~(1UL << (ffs(pcpu->pc_cache.dc_size) - 1))) != 0) panic("cache_init: D$ size not a power of 2"); - if (((pcpu->pc_cache.dc_size / pcpu->pc_cache.dc_assoc) / + /* + * For CPUs which don't support unaliasing in hardware ensure that + * the data cache doesn't have too many virtual colors. + */ + if (pcpu->pc_impl != CPU_IMPL_SPARC64V && + ((pcpu->pc_cache.dc_size / pcpu->pc_cache.dc_assoc) / PAGE_SIZE) != DCACHE_COLORS) panic("cache_init: too many D$ colors"); set = pcpu->pc_cache.ec_size / pcpu->pc_cache.ec_assoc; @@ -155,12 +160,21 @@ cache_init(struct pcpu *pcpu) icache_page_inval = cheetah_icache_page_inval; tlb_flush_nonlocked = cheetah_tlb_flush_nonlocked; tlb_flush_user = cheetah_tlb_flush_user; - } else { + } else if (pcpu->pc_impl == CPU_IMPL_SPARC64V) { + cache_enable = cheetah_cache_enable; + cache_flush = zeus_cache_flush; + dcache_page_inval = zeus_dcache_page_inval; + icache_page_inval = zeus_icache_page_inval; + tlb_flush_nonlocked = cheetah_tlb_flush_nonlocked; + tlb_flush_user = cheetah_tlb_flush_user; + } else if (pcpu->pc_impl >= CPU_IMPL_ULTRASPARCI && + pcpu->pc_impl < CPU_IMPL_ULTRASPARCIII) { cache_enable = spitfire_cache_enable; cache_flush = spitfire_cache_flush; dcache_page_inval = spitfire_dcache_page_inval; icache_page_inval = spitfire_icache_page_inval; tlb_flush_nonlocked = spitfire_tlb_flush_nonlocked; tlb_flush_user = spitfire_tlb_flush_user; - } + } else + panic("cache_init: unknown CPU"); } diff --git a/sys/sparc64/sparc64/cheetah.c b/sys/sparc64/sparc64/cheetah.c index a99a70ca39b..99d38c966ff 100644 --- a/sys/sparc64/sparc64/cheetah.c +++ b/sys/sparc64/sparc64/cheetah.c @@ -39,11 +39,13 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -52,19 +54,13 @@ __FBSDID("$FreeBSD$"); #define CHEETAH_ICACHE_TAG_LOWER 0x30 /* - * CPU-specific initialization + * CPU-specific initialization - this is used for both the Sun Cheetah and + * later as well as the Fujitsu Zeus and later CPUs. */ void cheetah_init(u_int cpu_impl) { u_long val; - register_t s; - - /* - * Disable interrupts for safety, this shouldn't be actually - * necessary though. - */ - s = intr_disable(); /* Ensure the TSB Extension Registers hold 0 as TSB_Base. */ @@ -82,6 +78,14 @@ cheetah_init(u_int cpu_impl) stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0); membar(Sync); + if (cpu_impl == CPU_IMPL_SPARC64V) { + /* Ensure MCNTL_JPS1_TSBP is 0. */ + val = ldxa(AA_MCNTL, ASI_MCNTL); + val &= ~MCNTL_JPS1_TSBP; + stxa(AA_MCNTL, ASI_MCNTL, val); + return; + } + /* * Configure the first large dTLB to hold 4MB pages (e.g. for direct * mappings) for all three contexts and ensure the second one is set @@ -134,8 +138,6 @@ cheetah_init(u_int cpu_impl) val &= ~DCR_DTPE; } wr(asr18, val, 0); - - intr_restore(s); } /* @@ -216,7 +218,7 @@ cheetah_dcache_page_inval(vm_paddr_t spa) * consistency is maintained by hardware. */ void -cheetah_icache_page_inval(vm_paddr_t pa) +cheetah_icache_page_inval(vm_paddr_t pa __unused) { } diff --git a/sys/sparc64/sparc64/identcpu.c b/sys/sparc64/sparc64/identcpu.c index 0b731515f16..455aa6719bc 100644 --- a/sys/sparc64/sparc64/identcpu.c +++ b/sys/sparc64/sparc64/identcpu.c @@ -41,7 +41,7 @@ cpu_identify(u_long vers, u_int freq, u_int id) switch (VER_MANUF(vers)) { case 0x04: - manus = "HAL"; + manus = "HAL/Fujitsu"; break; case 0x13: case 0x17: @@ -57,6 +57,27 @@ cpu_identify(u_long vers, u_int freq, u_int id) case CPU_IMPL_SPARC64: impls = "SPARC64"; break; + case CPU_IMPL_SPARC64II: + impls = "SPARC64-II"; + break; + case CPU_IMPL_SPARC64III: + impls = "SPARC64-III"; + break; + case CPU_IMPL_SPARC64IV: + impls = "SPARC64-IV"; + break; + case CPU_IMPL_SPARC64V: + impls = "SPARC64-V"; + break; + case CPU_IMPL_SPARC64VI: + impls = "SPARC64-VI"; + break; + case CPU_IMPL_SPARC64VII: + impls = "SPARC64-VII"; + break; + case CPU_IMPL_SPARC64VIIIfx: + impls = "SPARC64-VIIIfx"; + break; case CPU_IMPL_ULTRASPARCI: impls = "UltraSparc-I"; break; @@ -67,7 +88,6 @@ cpu_identify(u_long vers, u_int freq, u_int id) impls = "UltraSparc-IIi"; break; case CPU_IMPL_ULTRASPARCIIe: - /* V9 Manual says `UltraSparc-e'. I assume this is wrong. */ impls = "UltraSparc-IIe"; break; case CPU_IMPL_ULTRASPARCIII: diff --git a/sys/sparc64/sparc64/interrupt.S b/sys/sparc64/sparc64/interrupt.S index 75a97275301..b71a05da98d 100644 --- a/sys/sparc64/sparc64/interrupt.S +++ b/sys/sparc64/sparc64/interrupt.S @@ -83,8 +83,11 @@ ENTRY(intr_vector) * The 2nd word points to code to execute and the 3rd is an argument * to pass. Jump to it. */ - brnz,a,pt %g3, 1f - nop + brnz,pt %g3, 1f + /* + * NB: Zeus CPUs set some undocumented bits in the first data word. + */ + and %g3, IV_MAX - 1, %g3 jmpl %g4, %g0 nop /* NOTREACHED */ diff --git a/sys/sparc64/sparc64/locore.S b/sys/sparc64/sparc64/locore.S index a084a3884d2..41f55a53a8b 100644 --- a/sys/sparc64/sparc64/locore.S +++ b/sys/sparc64/sparc64/locore.S @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -47,7 +48,7 @@ ENTRY(btext) ENTRY(_start) /* * Initialize misc. state to known values: interrupts disabled, normal - * globals, windows flushed (cr = 0, cs = nwindows - 1), PIL 0 and + * globals, windows flushed (cr = 0, cs = nwindows - 1), PIL_TICK and * floating point disabled. * Note that some firmware versions don't implement a clean window * trap handler so we unfortunately can't clear the windows by setting @@ -55,7 +56,7 @@ ENTRY(_start) */ wrpr %g0, PSTATE_NORMAL, %pstate flushw - wrpr %g0, 0, %pil + wrpr %g0, PIL_TICK, %pil wr %g0, 0, %fprs /* @@ -65,11 +66,6 @@ ENTRY(_start) SET(pcpu0 + (PCPU_PAGES * PAGE_SIZE) - PC_SIZEOF, %l1, %l0) sub %l0, SPOFF + CCFSZ, %sp - /* - * Enable interrupts. - */ - wrpr %g0, PSTATE_KERNEL, %pstate - /* * Do initial bootstrap to setup pmap and thread0. */ @@ -97,7 +93,7 @@ ENTRY(cpu_setregs) ldx [%o0 + PC_CURPCB], %o1 /* - * Disable interrupts, normal globals. + * Ensure we are on normal globals. */ wrpr %g0, PSTATE_NORMAL, %pstate @@ -147,11 +143,6 @@ ENTRY(cpu_setregs) wrpr %o1, 0, %tba stw %o3, [%o2] - /* - * Re-enable interrupts. - */ - wrpr %g0, PSTATE_KERNEL, %pstate - retl nop END(cpu_setregs) diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index a020fb8038f..eda2e01670a 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -276,6 +276,7 @@ cpu_cpuid_prop(u_int cpu_impl) switch (cpu_impl) { case CPU_IMPL_SPARC64: + case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: @@ -300,6 +301,7 @@ cpu_get_mid(u_int cpu_impl) switch (cpu_impl) { case CPU_IMPL_SPARC64: + case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: @@ -341,9 +343,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) cpu_impl = VER_IMPL(rdpr(ver)); /* - * Do CPU-specific Initialization. + * Do CPU-specific initialization. */ - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) cheetah_init(cpu_impl); /* @@ -477,6 +480,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) sizeof(itlb_slots)) == -1) panic("sparc64_init: cannot determine number of iTLB slots"); + /* + * Initialize and enable the caches. Note that his may include + * applying workarounds. + */ cache_init(pc); cache_enable(cpu_impl); uma_set_align(pc->pc_cache.dc_linesize - 1); @@ -487,6 +494,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: + case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: @@ -568,8 +576,18 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) dpcpu_init(dpcpu0, 0); msgbufinit(msgbufp, MSGBUF_SIZE); + /* + * Initialize mutexes. + */ mutex_init(); + + /* + * Finish the interrupt initialization now that mutexes work and + * enable them. + */ intr_init2(); + wrpr(pil, 0, PIL_TICK); + wrpr(pstate, 0, PSTATE_KERNEL); /* * Finish pmap initialization now that we're ready for mutexes. diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S index 17dc444c7d9..e78b4b9afb3 100644 --- a/sys/sparc64/sparc64/mp_locore.S +++ b/sys/sparc64/sparc64/mp_locore.S @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -44,14 +45,14 @@ __FBSDID("$FreeBSD$"); _ALIGN_TEXT /* * Initialize misc. state to known values: interrupts disabled, normal - * globals, windows flushed (cr = 0, cs = nwindows - 1), PIL 0 and + * globals, windows flushed (cr = 0, cs = nwindows - 1), PIL_TICK and * floating point disabled. * Note that some firmware versions don't implement a clean window * trap handler so we unfortunately can't clear the windows by setting * %cleanwin to zero here. */ 1: wrpr %g0, PSTATE_NORMAL, %pstate - wrpr %g0, 0, %pil + wrpr %g0, PIL_TICK, %pil wr %g0, 0, %fprs rdpr %ver, %l7 @@ -199,19 +200,25 @@ ENTRY(mp_startup) srlx %l1, VER_IMPL_SHIFT, %l1 sll %l1, VER_IMPL_SIZE, %l1 srl %l1, VER_IMPL_SIZE, %l1 + cmp %l1, CPU_IMPL_SPARC64V + bl %icc, 4f + nop + cmp %l1, CPU_IMPL_ULTRASPARCI + bl %icc, 2f + nop cmp %l1, CPU_IMPL_ULTRASPARCIII bl %icc, 3f nop - mov CPU_STICKSYNC, %l2 +2: mov CPU_STICKSYNC, %l2 membar #StoreLoad stw %l2, [%l0 + CSA_STATE] -2: ldx [%l0 + CSA_STICK], %l2 - brz %l2, 2b +3: ldx [%l0 + CSA_STICK], %l2 + brz %l2, 3b nop wr %l2, 0, %asr24 -3: call cpu_get_mid +4: call cpu_get_mid mov %l1, %o0 /* @@ -224,9 +231,9 @@ ENTRY(mp_startup) /* * Wait till its our turn to bootstrap. */ -4: lduw [%l0 + CSA_MID], %l1 +5: lduw [%l0 + CSA_MID], %l1 cmp %l1, %o0 - bne %xcc, 4b + bne %xcc, 5b nop add %l0, CSA_TTES, %l1 @@ -235,7 +242,7 @@ ENTRY(mp_startup) /* * Map the per-CPU pages. */ -5: sllx %l2, TTE_SHIFT, %l3 +6: sllx %l2, TTE_SHIFT, %l3 add %l1, %l3, %l3 ldx [%l3 + TTE_VPN], %l4 @@ -250,7 +257,7 @@ ENTRY(mp_startup) add %l2, 1, %l2 cmp %l2, PCPU_PAGES - bne %xcc, 5b + bne %xcc, 6b nop /* @@ -262,11 +269,6 @@ ENTRY(mp_startup) add %l1, %l2, %l1 sub %l1, SPOFF + CCFSZ, %sp - /* - * Enable interrupts. - */ - wrpr %g0, PSTATE_KERNEL, %pstate - #if KTR_COMPILE & KTR_SMP CATR(KTR_SMP, "mp_startup: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx" diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index 8ea72f3954a..428120437bc 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -164,7 +164,8 @@ mp_init(u_int cpu_impl) if (cpu_impl == CPU_IMPL_ULTRASPARCIIIi || cpu_impl == CPU_IMPL_ULTRASPARCIIIip) isjbus = 1; - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) cpu_ipi_selected = cheetah_ipi_selected; else cpu_ipi_selected = spitfire_ipi_selected; @@ -315,7 +316,8 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl) ; membar(StoreLoad); csa->csa_tick = rd(tick); - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) { + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) { while (csa->csa_state != CPU_STICKSYNC) ; membar(StoreLoad); @@ -409,16 +411,33 @@ cpu_mp_bootstrap(struct pcpu *pc) volatile struct cpu_start_args *csa; csa = &cpu_start_args; - if (pc->pc_impl >= CPU_IMPL_ULTRASPARCIII) + + /* Do CPU-specific initialization. */ + if (pc->pc_impl == CPU_IMPL_SPARC64V || + pc->pc_impl >= CPU_IMPL_ULTRASPARCIII) cheetah_init(pc->pc_impl); + /* + * Enable the caches. Note that his may include applying workarounds. + */ cache_enable(pc->pc_impl); + + /* Lock the kernel TSB in the TLB. */ pmap_map_tsb(); + /* * Flush all non-locked TLB entries possibly left over by the * firmware. */ tlb_flush_nonlocked(); + + /* Initialize global registers. */ cpu_setregs(pc); + + /* Enable interrupts. */ + wrpr(pil, 0, PIL_TICK); + wrpr(pstate, 0, PSTATE_KERNEL); + + /* Start the (S)TICK interrupts. */ tick_start(); smp_cpus++; diff --git a/sys/sparc64/sparc64/nexus.c b/sys/sparc64/sparc64/nexus.c index ee01aa86ae0..192251db30d 100644 --- a/sys/sparc64/sparc64/nexus.c +++ b/sys/sparc64/sparc64/nexus.c @@ -153,6 +153,7 @@ EARLY_DRIVER_MODULE(nexus, root, nexus_driver, nexus_devclass, 0, 0, MODULE_VERSION(nexus, 1); static const char *const nexus_excl_name[] = { + "FJSV,system", "aliases", "associations", "chosen", @@ -163,6 +164,7 @@ static const char *const nexus_excl_name[] = { "openprom", "options", "packages", + "physical-memory", "rsc", "sgcn", "todsg", diff --git a/sys/sparc64/sparc64/ofw_machdep.c b/sys/sparc64/sparc64/ofw_machdep.c index 07f95e20e51..88ee0726725 100644 --- a/sys/sparc64/sparc64/ofw_machdep.c +++ b/sys/sparc64/sparc64/ofw_machdep.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 2001 by Thomas Moestl . - * Copyright (c) 2005 - 2009 by Marius Strobl . + * Copyright (c) 2005 - 2010 by Marius Strobl . * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -68,6 +68,19 @@ OF_getetheraddr(device_t dev, u_char *addr) bcopy(&idp.id_ether, addr, ETHER_ADDR_LEN); } +u_int +OF_getscsinitid(device_t dev) +{ + phandle_t node; + uint32_t id; + + for (node = ofw_bus_get_node(dev); node != 0; node = OF_parent(node)) + if (OF_getprop(node, "scsi-initiator-id", &id, + sizeof(id)) > 0) + return (id); + return (7); +} + static __inline uint32_t phys_hi_mask_space(const char *bus, uint32_t phys_hi) { diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index e46455915c2..0a90b4032ab 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -528,7 +528,8 @@ pmap_bootstrap(u_int cpu_impl) tp->tte_data = ((translations[i].om_tte & ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) | - (cpu_impl < CPU_IMPL_ULTRASPARCIII ? + (cpu_impl >= CPU_IMPL_ULTRASPARCI && + cpu_impl < CPU_IMPL_ULTRASPARCIII ? (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) : (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) | (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) + @@ -694,31 +695,35 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) { struct tte *tp; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; + PMAP_LOCK(pm); +retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); + (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), + &pa); vm_page_hold(m); } else { tp = tsb_kvtotte(va); if ((tp->tte_data & TD_V) == 0) tp = NULL; } - } else { - PMAP_LOCK(pm); + } else tp = tsb_tte_lookup(pm, va); - } if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); vm_page_hold(m); } - vm_page_unlock_queues(); - if (pm != kernel_pmap) - PMAP_UNLOCK(pm); + PA_UNLOCK_COND(pa); + PMAP_UNLOCK(pm); return (m); } @@ -1086,7 +1091,7 @@ pmap_pinit(pmap_t pm) * Allocate an object for it. */ if (pm->pm_tsb_obj == NULL) - pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES); + pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES); VM_OBJECT_LOCK(pm->pm_tsb_obj); for (i = 0; i < TSB_PAGES; i++) { @@ -1147,16 +1152,10 @@ pmap_release(pmap_t pm) KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1")); while (!TAILQ_EMPTY(&obj->memq)) { m = TAILQ_FIRST(&obj->memq); - vm_page_lock_queues(); - if (vm_page_sleep_if_busy(m, FALSE, "pmaprl")) - continue; - KASSERT(m->hold_count == 0, - ("pmap_release: freeing held tsb page")); m->md.pmap = NULL; m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(obj); pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES); @@ -1273,13 +1272,10 @@ pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp, u_long data; vm_page_t m; - data = atomic_clear_long(&tp->tte_data, TD_REF | TD_SW | TD_W); - if ((data & TD_PV) != 0) { + data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W); + if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) { m = PHYS_TO_VM_PAGE(TD_PA(data)); - if ((data & TD_REF) != 0) - vm_page_flag_set(m, PG_REFERENCED); - if ((data & TD_W) != 0) - vm_page_dirty(m); + vm_page_dirty(m); } return (1); } @@ -1838,14 +1834,19 @@ boolean_t pmap_page_is_mapped(vm_page_t m) { struct tte *tp; + boolean_t rv; + rv = FALSE; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) - return (FALSE); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + return (rv); + vm_page_lock_queues(); TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) - if ((tp->tte_data & TD_PV) != 0) - return (TRUE); - return (FALSE); + if ((tp->tte_data & TD_PV) != 0) { + rv = TRUE; + break; + } + vm_page_unlock_queues(); + return (rv); } /* @@ -1917,6 +1918,27 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) return (FALSE); } +/* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + struct tte *tp; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (FALSE); + TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) { + if ((tp->tte_data & TD_PV) == 0) + continue; + if ((tp->tte_data & TD_REF) != 0) + return (TRUE); + } + return (FALSE); +} + void pmap_clear_modify(vm_page_t m) { diff --git a/sys/sparc64/sparc64/tick.c b/sys/sparc64/sparc64/tick.c index 1a38013e884..041faa0ab04 100644 --- a/sys/sparc64/sparc64/tick.c +++ b/sys/sparc64/sparc64/tick.c @@ -120,7 +120,8 @@ cpu_initclocks(void) */ } else { clock = PCPU_GET(clock); - intr_setup(PIL_TICK, PCPU_GET(impl) < CPU_IMPL_ULTRASPARCIII ? + intr_setup(PIL_TICK, PCPU_GET(impl) >= CPU_IMPL_ULTRASPARCI && + PCPU_GET(impl) < CPU_IMPL_ULTRASPARCIII ? tick_hardclock_bbwar : tick_hardclock, -1, NULL, NULL); set_cputicker(tick_cputicks, clock, 0); } @@ -325,7 +326,8 @@ void tick_clear(u_int cpu_impl) { - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) wrstick(0, 0); wrpr(tick, 0, 0); } @@ -334,7 +336,8 @@ void tick_stop(u_int cpu_impl) { - if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) + if (cpu_impl == CPU_IMPL_SPARC64V || + cpu_impl >= CPU_IMPL_ULTRASPARCIII) wrstickcmpr(1L << 63, 0); wrtickcmpr(1L << 63, 0); } diff --git a/sys/mips/include/psl.h b/sys/sparc64/sparc64/zeus.c similarity index 53% rename from sys/mips/include/psl.h rename to sys/sparc64/sparc64/zeus.c index f02a1a95fa9..a3dc4e56e92 100644 --- a/sys/mips/include/psl.h +++ b/sys/sparc64/sparc64/zeus.c @@ -1,11 +1,6 @@ -/* $OpenBSD: psl.h,v 1.2 1998/01/28 13:46:25 pefo Exp $ */ - /*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Ralph Campbell. + * Copyright (c) 2010 Marius Strobl + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -15,14 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -30,21 +22,44 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * from: @(#)psl.h 8.1 (Berkeley) 6/10/93 - * JNPR: psl.h,v 1.1 2006/08/07 05:38:57 katta - * $FreeBSD$ */ -#ifndef _MACHINE_PSL_H_ -#define _MACHINE_PSL_H_ +#include +__FBSDID("$FreeBSD$"); -#include +#include +#include + +#include +#include +#include /* - * Macros to decode processor status word. + * Flush all lines from the level 1 caches. */ -#define USERMODE(ps) (((ps) & SR_KSU_MASK) == SR_KSU_USER) -#define BASEPRI(ps) (((ps) & (INT_MASK | SR_INT_ENA_PREV)) \ - == (INT_MASK | SR_INT_ENA_PREV)) -#endif /* _MACHINE_PSL_H_ */ +void +zeus_cache_flush(void) +{ + + stxa_sync(0, ASI_FLUSH_L1I, 0); +} + +/* + * Flush a physical page from the data cache. Data cache consistency is + * maintained by hardware. + */ +void +zeus_dcache_page_inval(vm_paddr_t spa __unused) +{ + +} + +/* + * Flush a physical page from the intsruction cache. Instruction cache + * consistency is maintained by hardware. + */ +void +zeus_icache_page_inval(vm_paddr_t pa __unused) +{ + +} diff --git a/sys/sun4v/conf/GENERIC b/sys/sun4v/conf/GENERIC index 4c7cd5bf0c0..26ec3afa4ce 100644 --- a/sys/sun4v/conf/GENERIC +++ b/sys/sun4v/conf/GENERIC @@ -183,6 +183,7 @@ device faith # IPv6-to-IPv4 relaying (translation) device bpf # Berkeley packet filter # USB support +options USB_DEBUG # enable debug msgs #device uhci # UHCI PCI->USB interface #device ohci # OHCI PCI->USB interface device usb # USB Bus (required) diff --git a/sys/sun4v/include/ofw_machdep.h b/sys/sun4v/include/ofw_machdep.h index 625b13137ad..658d9c7b0d2 100644 --- a/sys/sun4v/include/ofw_machdep.h +++ b/sys/sun4v/include/ofw_machdep.h @@ -36,6 +36,7 @@ typedef uint64_t cell_t; int OF_decode_addr(phandle_t, int, int *, bus_addr_t *); void OF_getetheraddr(device_t, u_char *); +u_int OF_getscsinitid(device_t); void cpu_shutdown(void *); int ofw_entry(void *); void ofw_exit(void *); diff --git a/sys/sun4v/include/pmap.h b/sys/sun4v/include/pmap.h index 90ae4c45560..6acebb40d62 100644 --- a/sys/sun4v/include/pmap.h +++ b/sys/sun4v/include/pmap.h @@ -75,6 +75,8 @@ struct pmap { struct tte_hash *pm_hash; TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ struct hv_tsb_info pm_tsb; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; pmap_cpumask_t pm_active; /* mask of cpus currently using pmap */ pmap_cpumask_t pm_tlbactive; /* mask of cpus that have used this pmap */ struct pmap_statistics pm_stats; diff --git a/sys/sun4v/include/proc.h b/sys/sun4v/include/proc.h index bfd1268e433..3c236637fb9 100644 --- a/sys/sun4v/include/proc.h +++ b/sys/sun4v/include/proc.h @@ -51,4 +51,6 @@ struct mdproc { void *md_sigtramp; }; +#define KINFO_PROC_SIZE 1088 + #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/sun4v/sun4v/pmap.c b/sys/sun4v/sun4v/pmap.c index d3b8c79467e..0c8442133b2 100644 --- a/sys/sun4v/sun4v/pmap.c +++ b/sys/sun4v/sun4v/pmap.c @@ -1275,17 +1275,21 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { tte_t tte_data; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: tte_data = tte_hash_lookup(pmap->pm_hash, va); if (tte_data != 0 && ((tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, TTE_GET_PA(tte_data), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); @@ -1591,6 +1595,17 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t va) return (tte_hash_lookup(pmap->pm_hash, va) == 0); } +/* + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + return (tte_get_phys_bit(m, VTD_REF)); +} + /* * Extract the physical page address associated with the given kernel virtual * address. @@ -1813,17 +1828,10 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (!anychanged && (otte_data & VTD_W)) anychanged = 1; - if (otte_data & VTD_MANAGED) { - m = NULL; - - if (otte_data & VTD_REF) { - m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); - vm_page_flag_set(m, PG_REFERENCED); - } - if (otte_data & VTD_W) { - m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); - vm_page_dirty(m); - } + if ((otte_data & (VTD_MANAGED | VTD_W)) == (VTD_MANAGED | + VTD_W)) { + m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data)); + vm_page_dirty(m); } } diff --git a/sys/sys/_task.h b/sys/sys/_task.h index 2a51e1b07b9..781cd48cf65 100644 --- a/sys/sys/_task.h +++ b/sys/sys/_task.h @@ -36,15 +36,21 @@ * taskqueue_run(). The first argument is taken from the 'ta_context' * field of struct task and the second argument is a count of how many * times the task was enqueued before the call to taskqueue_run(). + * + * List of locks + * (c) const after init + * (q) taskqueue lock */ typedef void task_fn_t(void *context, int pending); struct task { - STAILQ_ENTRY(task) ta_link; /* link for queue */ - u_short ta_pending; /* count times queued */ - u_short ta_priority; /* Priority */ - task_fn_t *ta_func; /* task handler */ - void *ta_context; /* argument for handler */ + STAILQ_ENTRY(task) ta_link; /* (q) link for queue */ + u_int ta_flags; /* (q) state of this task */ +#define TA_FLAGS_RUNNING 0x01 + u_short ta_pending; /* (q) count times queued */ + u_short ta_priority; /* (c) Priority */ + task_fn_t *ta_func; /* (c) task handler */ + void *ta_context; /* (c) argument for handler */ }; #endif /* !_SYS__TASK_H_ */ diff --git a/sys/sys/alq.h b/sys/sys/alq.h index bc4961e094c..4a502d29684 100644 --- a/sys/sys/alq.h +++ b/sys/sys/alq.h @@ -1,7 +1,13 @@ /*- * Copyright (c) 2002, Jeffrey Roberson + * Copyright (c) 2008-2009, Lawrence Stewart + * Copyright (c) 2010, The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -41,46 +47,47 @@ extern struct thread *ald_thread; * Async. Logging Entry */ struct ale { - struct ale *ae_next; /* Next Entry */ - char *ae_data; /* Entry buffer */ - int ae_flags; /* Entry flags */ + intptr_t ae_bytesused; /* # bytes written to ALE. */ + char *ae_data; /* Write ptr. */ + int ae_pad; /* Unused, compat. */ }; -#define AE_VALID 0x0001 /* Entry has valid data */ - - -/* waitok options */ -#define ALQ_NOWAIT 0x0001 -#define ALQ_WAITOK 0x0002 +/* Flag options. */ +#define ALQ_NOWAIT 0x0001 /* ALQ may not sleep. */ +#define ALQ_WAITOK 0x0002 /* ALQ may sleep. */ +#define ALQ_NOACTIVATE 0x0004 /* Don't activate ALQ after write. */ +#define ALQ_ORDERED 0x0010 /* Maintain write ordering between threads. */ /* Suggested mode for file creation. */ #define ALQ_DEFAULT_CMODE 0600 /* - * alq_open: Creates a new queue + * alq_open_flags: Creates a new queue * * Arguments: * alq Storage for a pointer to the newly created queue. * file The filename to open for logging. * cred Credential to authorize open and I/O with. * cmode Creation mode for file, if new. - * size The size of each entry in the queue. - * count The number of items in the buffer, this should be large enough - * to store items over the period of a disk write. + * size The size of the queue in bytes. + * flags ALQ_ORDERED * Returns: * error from open or 0 on success */ struct ucred; -int alq_open(struct alq **, const char *file, struct ucred *cred, int cmode, +int alq_open_flags(struct alq **alqp, const char *file, struct ucred *cred, int cmode, + int size, int flags); +int alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode, int size, int count); /* - * alq_write: Write data into the queue + * alq_writen: Write data into the queue * * Arguments: * alq The queue we're writing to * data The entry to be recorded - * waitok Are we permitted to wait? + * len The number of bytes to write from *data + * flags (ALQ_NOWAIT || ALQ_WAITOK), ALQ_NOACTIVATE * * Returns: * EWOULDBLOCK if: @@ -88,7 +95,8 @@ int alq_open(struct alq **, const char *file, struct ucred *cred, int cmode, * The system is shutting down. * 0 on success. */ -int alq_write(struct alq *alq, void *data, int waitok); +int alq_writen(struct alq *alq, void *data, int len, int flags); +int alq_write(struct alq *alq, void *data, int flags); /* * alq_flush: Flush the queue out to disk @@ -101,27 +109,36 @@ void alq_flush(struct alq *alq); void alq_close(struct alq *alq); /* - * alq_get: Return an entry for direct access + * alq_getn: Return an entry for direct access * * Arguments: * alq The queue to retrieve an entry from - * waitok Are we permitted to wait? + * len Max number of bytes required + * flags (ALQ_NOWAIT || ALQ_WAITOK) * * Returns: * The next available ale on success. * NULL if: - * Waitok is ALQ_NOWAIT and the queue is full. + * flags is ALQ_NOWAIT and the queue is full. * The system is shutting down. * * This leaves the queue locked until a subsequent alq_post. */ -struct ale *alq_get(struct alq *alq, int waitok); +struct ale *alq_getn(struct alq *alq, int len, int flags); +struct ale *alq_get(struct alq *alq, int flags); /* - * alq_post: Schedule the ale retrieved by alq_get for writing. + * alq_post_flags: Schedule the ale retrieved by alq_get/alq_getn for writing. * alq The queue to post the entry to. * ale An asynch logging entry returned by alq_get. + * flags ALQ_NOACTIVATE */ -void alq_post(struct alq *, struct ale *); +void alq_post_flags(struct alq *alq, struct ale *ale, int flags); + +static __inline void +alq_post(struct alq *alq, struct ale *ale) +{ + alq_post_flags(alq, ale, 0); +} #endif /* _SYS_ALQ_H_ */ diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 8f3b1b2ab3f..137f90f6d72 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -215,7 +215,7 @@ struct buf { #define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */ #define B_RELBUF 0x00400000 /* Release VMIO buffer. */ #define B_00800000 0x00800000 /* Available flag. */ -#define B_01000000 0x01000000 /* Available flag. */ +#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */ #define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */ #define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */ #define B_MANAGED 0x08000000 /* Managed by FS. */ @@ -493,6 +493,7 @@ int bufwait(struct buf *); int bufwrite(struct buf *); void bufdone(struct buf *); void bufdone_finish(struct buf *); +void bd_speedup(void); int cluster_read(struct vnode *, u_quad_t, daddr_t, long, struct ucred *, long, int, struct buf **); diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h index efa667de189..57e42e5c189 100644 --- a/sys/sys/buf_ring.h +++ b/sys/sys/buf_ring.h @@ -1,33 +1,31 @@ -/************************************************************************** - * - * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org +/*- + * Copyright (c) 2007-2009 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. The name of Kip Macy nor the names of other - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ * - ***************************************************************************/ + */ #ifndef _SYS_BUF_RING_H_ #define _SYS_BUF_RING_H_ diff --git a/sys/sys/conf.h b/sys/sys/conf.h index a18d6996a6c..937414d7b65 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -262,8 +262,10 @@ struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid, struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); -#define MAKEDEV_REF 0x1 -#define MAKEDEV_WHTOUT 0x2 +#define MAKEDEV_REF 0x1 +#define MAKEDEV_WHTOUT 0x2 +#define MAKEDEV_NOWAIT 0x4 +#define MAKEDEV_WAITOK 0x8 struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, diff --git a/sys/sys/dtrace_bsd.h b/sys/sys/dtrace_bsd.h index f3232841fa0..a14a1a160f3 100644 --- a/sys/sys/dtrace_bsd.h +++ b/sys/sys/dtrace_bsd.h @@ -50,7 +50,7 @@ typedef void (*cyclic_clock_func_t)(struct trapframe *); * * Defining them here avoids a proliferation of header files. */ -extern cyclic_clock_func_t lapic_cyclic_clock_func[]; +extern cyclic_clock_func_t cyclic_clock_func[]; /* * The dtrace module handles traps that occur during a DTrace probe. diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 158edb4fdc2..95a44a4aecf 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -523,6 +523,9 @@ m_getjcl(int how, short type, int flags, int size) struct mbuf *m, *n; uma_zone_t zone; + if (size == MCLBYTES) + return m_getcl(how, type, flags); + args.flags = flags; args.type = type; diff --git a/sys/sys/mount.h b/sys/sys/mount.h index b8264130a17..20dcf641c7b 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -275,7 +275,8 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp); MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ - MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | MNT_NFS4ACLS) + MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ + MNT_NFS4ACLS) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ @@ -324,6 +325,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp); #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ #define MNTK_SHARED_WRITES 0x00000080 /* Allow shared locking for writes */ +#define MNTK_SUJ 0x00000100 /* Softdep journaling enabled */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ diff --git a/sys/sys/param.h b/sys/sys/param.h index 9f87ee2c21c..11c15ab1e84 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 900010 /* Master, propagated to newvers */ +#define __FreeBSD_version 900011 /* Master, propagated to newvers */ #ifndef LOCORE #include diff --git a/sys/sys/proc.h b/sys/sys/proc.h index fb31cfc1318..e32e494f00f 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -172,6 +172,27 @@ struct kdtrace_proc; struct kdtrace_thread; struct cpuset; +/* + * XXX: Does this belong in resource.h or resourcevar.h instead? + * Resource usage extension. The times in rusage structs in the kernel are + * never up to date. The actual times are kept as runtimes and tick counts + * (with control info in the "previous" times), and are converted when + * userland asks for rusage info. Backwards compatibility prevents putting + * this directly in the user-visible rusage struct. + * + * Locking for p_rux: (cj) means (j) for p_rux and (c) for p_crux. + * Locking for td_rux: (t) for all fields. + */ +struct rusage_ext { + u_int64_t rux_runtime; /* (cj) Real time. */ + u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */ + u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */ + u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */ + u_int64_t rux_uu; /* (c) Previous user time in usec. */ + u_int64_t rux_su; /* (c) Previous sys time in usec. */ + u_int64_t rux_tu; /* (c) Previous total time in usec. */ +}; + /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. @@ -219,7 +240,8 @@ struct thread { u_int td_estcpu; /* (t) estimated cpu utilization */ int td_slptick; /* (t) Time at sleep. */ int td_blktick; /* (t) Time spent blocked. */ - struct rusage td_ru; /* (t) rusage information */ + struct rusage td_ru; /* (t) rusage information. */ + struct rusage_ext td_rux; /* (t) Internal rusage information. */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ u_int td_pticks; /* (t) Statclock hits for profiling */ @@ -425,26 +447,6 @@ do { \ #define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN -/* - * XXX: Does this belong in resource.h or resourcevar.h instead? - * Resource usage extension. The times in rusage structs in the kernel are - * never up to date. The actual times are kept as runtimes and tick counts - * (with control info in the "previous" times), and are converted when - * userland asks for rusage info. Backwards compatibility prevents putting - * this directly in the user-visible rusage struct. - * - * Locking: (cj) means (j) for p_rux and (c) for p_crux. - */ -struct rusage_ext { - u_int64_t rux_runtime; /* (cj) Real time. */ - u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */ - u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */ - u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */ - u_int64_t rux_uu; /* (c) Previous user time in usec. */ - u_int64_t rux_su; /* (c) Previous sys time in usec. */ - u_int64_t rux_tu; /* (c) Previous total time in usec. */ -}; - /* * Process structure. */ diff --git a/sys/sys/resource.h b/sys/sys/resource.h index 9af96afe0e0..e7037443246 100644 --- a/sys/sys/resource.h +++ b/sys/sys/resource.h @@ -56,6 +56,7 @@ #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN -1 +#define RUSAGE_THREAD 1 struct rusage { struct timeval ru_utime; /* user time used */ diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index 21728aa67e4..95a9b49e1a6 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -131,7 +131,7 @@ void rucollect(struct rusage *ru, struct rusage *ru2); void rufetch(struct proc *p, struct rusage *ru); void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, struct timeval *sp); -void ruxagg(struct rusage_ext *rux, struct thread *td); +void ruxagg_locked(struct rusage_ext *rux, struct thread *td); int suswintr(void *base, int word); struct uidinfo *uifind(uid_t uid); diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h index bf2e4ee31d0..fa8897d6dce 100644 --- a/sys/sys/taskqueue.h +++ b/sys/sys/taskqueue.h @@ -75,6 +75,7 @@ void taskqueue_thread_enqueue(void *context); (task)->ta_priority = (priority); \ (task)->ta_func = (func); \ (task)->ta_context = (context); \ + (task)->ta_flags = 0; \ } while (0) /* diff --git a/sys/sys/thr.h b/sys/sys/thr.h index ae4a65d368f..7ccc8724ffb 100644 --- a/sys/sys/thr.h +++ b/sys/sys/thr.h @@ -30,6 +30,7 @@ #ifndef _SYS_THR_H_ #define _SYS_THR_H_ +#include #include #include @@ -68,6 +69,7 @@ typedef __pid_t pid_t; #define _PID_T_DECLARED #endif +__BEGIN_DECLS int thr_create(ucontext_t *ctx, long *id, int flags); int thr_new(struct thr_param *param, int param_size); int thr_self(long *id); @@ -77,7 +79,7 @@ int thr_kill2(pid_t pid, long id, int sig); int thr_suspend(const struct timespec *timeout); int thr_wake(long id); int thr_set_name(long id, const char *name); - +__END_DECLS #endif /* !_KERNEL */ #endif /* ! _SYS_THR_H_ */ diff --git a/sys/sys/user.h b/sys/sys/user.h index d5e46feb58f..34635e3dc18 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -87,34 +87,11 @@ #define KI_NSPARE_LONG 12 #define KI_NSPARE_PTR 7 -#ifdef __amd64__ -#define KINFO_PROC_SIZE 1088 -#endif -#ifdef __arm__ -#define KINFO_PROC_SIZE 792 -#endif -#ifdef __ia64__ -#define KINFO_PROC_SIZE 1088 -#endif -#ifdef __i386__ -#define KINFO_PROC_SIZE 768 -#endif -#ifdef __mips__ -#ifdef __mips_n64 -#define KINFO_PROC_SIZE 1088 -#else -#define KINFO_PROC_SIZE 816 -#endif -#endif -#ifdef __powerpc__ -#define KINFO_PROC_SIZE 768 -#endif -#ifdef __sparc64__ -#define KINFO_PROC_SIZE 1088 -#endif +#ifndef _KERNEL #ifndef KINFO_PROC_SIZE #error "Unknown architecture" #endif +#endif /* !_KERNEL */ #define WMESGLEN 8 /* size of returned wchan message */ #define LOCKNAMELEN 8 /* size of returned lock name */ diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 6c866c7f1dc..9f5756d987b 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -74,7 +74,7 @@ struct vmmeter { u_int v_tcached; /* (q) total pages cached */ u_int v_dfree; /* (q) pages freed by daemon */ - u_int v_pfree; /* (q) pages freed by exiting processes */ + u_int v_pfree; /* (p) pages freed by exiting processes */ u_int v_tfree; /* (p) total pages freed */ /* * Distribution of page usages. diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index b5784e4a75a..64b3ce4e89e 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -655,6 +655,8 @@ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); +int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio, + const struct thread *td); int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred, struct ucred *file_cred, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); @@ -670,6 +672,7 @@ int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp); + int vfs_cache_lookup(struct vop_lookup_args *ap); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp); diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 7bf11771972..b1f7ba0127f 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -94,24 +94,24 @@ __FBSDID("$FreeBSD$"); #include typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref, - int size); + int size, int rsize); -static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int); +static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int, int); static ufs2_daddr_t - ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); + ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int); #ifdef INVARIANTS static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); #endif -static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int); -static void ffs_clusteracct(struct ufsmount *, struct fs *, struct cg *, - ufs1_daddr_t, int); +static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int, + int); static ino_t ffs_dirpref(struct inode *); static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t, int, int); static void ffs_fserr(struct fs *, ino_t, char *); static ufs2_daddr_t ffs_hashalloc - (struct inode *, u_int, ufs2_daddr_t, int, allocfcn_t *); -static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int); + (struct inode *, u_int, ufs2_daddr_t, int, int, allocfcn_t *); +static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int, + int); static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); @@ -188,7 +188,7 @@ retry: cg = ino_to_cg(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg); if (bno > 0) { delta = btodb(size); if (ip->i_flag & IN_SPACECOUNTED) { @@ -387,16 +387,12 @@ retry: panic("ffs_realloccg: bad optim"); /* NOTREACHED */ } - bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, - ip->i_number); - if (nsize < request) - ffs_blkfree(ump, fs, ip->i_devvp, - bno + numfrags(fs, nsize), - (long)(request - nsize), ip->i_number); + ip->i_number, NULL); delta = btodb(nsize - osize); if (ip->i_flag & IN_SPACECOUNTED) { UFS_LOCK(ump); @@ -487,6 +483,14 @@ ffs_reallocblks(ap) if (doreallocblks == 0) return (ENOSPC); + /* + * We can't wait in softdep prealloc as it may fsync and recurse + * here. Instead we simply fail to reallocate blocks if this + * rare condition arises. + */ + if (DOINGSOFTDEP(ap->a_vp)) + if (softdep_prealloc(ap->a_vp, MNT_NOWAIT) != 0) + return (ENOSPC); if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) return (ffs_reallocblks_ufs1(ap)); return (ffs_reallocblks_ufs2(ap)); @@ -587,7 +591,7 @@ ffs_reallocblks_ufs1(ap) * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) { + len, len, ffs_clusteralloc)) == 0) { UFS_UNLOCK(ump); goto fail; } @@ -673,7 +677,7 @@ ffs_reallocblks_ufs1(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -795,7 +799,7 @@ ffs_reallocblks_ufs2(ap) * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, - len, ffs_clusteralloc)) == 0) { + len, len, ffs_clusteralloc)) == 0) { UFS_UNLOCK(ump); goto fail; } @@ -881,7 +885,7 @@ ffs_reallocblks_ufs2(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -969,7 +973,7 @@ ffs_valloc(pvp, mode, cred, vpp) if (fs->fs_contigdirs[cg] > 0) fs->fs_contigdirs[cg]--; } - ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, + ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, (allocfcn_t *)ffs_nodealloccg); if (ino == 0) goto noinodes; @@ -1278,11 +1282,12 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) */ /*VARARGS5*/ static ufs2_daddr_t -ffs_hashalloc(ip, cg, pref, size, allocator) +ffs_hashalloc(ip, cg, pref, size, rsize, allocator) struct inode *ip; u_int cg; ufs2_daddr_t pref; - int size; /* size for data blocks, mode for inodes */ + int size; /* Search size for data blocks, mode for inodes */ + int rsize; /* Real allocated size. */ allocfcn_t *allocator; { struct fs *fs; @@ -1298,7 +1303,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator) /* * 1: preferred cylinder group */ - result = (*allocator)(ip, cg, pref, size); + result = (*allocator)(ip, cg, pref, size, rsize); if (result) return (result); /* @@ -1308,7 +1313,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator) cg += i; if (cg >= fs->fs_ncg) cg -= fs->fs_ncg; - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, rsize); if (result) return (result); } @@ -1319,7 +1324,7 @@ ffs_hashalloc(ip, cg, pref, size, allocator) */ cg = (icg + 2) % fs->fs_ncg; for (i = 2; i < fs->fs_ncg; i++) { - result = (*allocator)(ip, cg, 0, size); + result = (*allocator)(ip, cg, 0, size, rsize); if (result) return (result); cg++; @@ -1401,7 +1406,8 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev, + frags, numfrags(fs, osize)); bdwrite(bp); return (bprev); @@ -1419,11 +1425,12 @@ fail: * and if it is, allocate it. */ static ufs2_daddr_t -ffs_alloccg(ip, cg, bpref, size) +ffs_alloccg(ip, cg, bpref, size, rsize) struct inode *ip; u_int cg; ufs2_daddr_t bpref; int size; + int rsize; { struct fs *fs; struct cg *cgp; @@ -1451,7 +1458,7 @@ ffs_alloccg(ip, cg, bpref, size) cgp->cg_old_time = cgp->cg_time = time_second; if (size == fs->fs_bsize) { UFS_LOCK(ump); - blkno = ffs_alloccgblk(ip, bp, bpref); + blkno = ffs_alloccgblk(ip, bp, bpref, rsize); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); bdwrite(bp); @@ -1475,21 +1482,14 @@ ffs_alloccg(ip, cg, bpref, size) if (cgp->cg_cs.cs_nbfree == 0) goto fail; UFS_LOCK(ump); - blkno = ffs_alloccgblk(ip, bp, bpref); - bno = dtogd(fs, blkno); - for (i = frags; i < fs->fs_frag; i++) - setbit(blksfree, bno + i); - i = fs->fs_frag - frags; - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - fs->fs_fmod = 1; - cgp->cg_frsum[i]++; + blkno = ffs_alloccgblk(ip, bp, bpref, rsize); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); bdwrite(bp); return (blkno); } + KASSERT(size == rsize, + ("ffs_alloccg: size(%d) != rsize(%d)", size, rsize)); bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); if (bno < 0) goto fail; @@ -1507,7 +1507,7 @@ ffs_alloccg(ip, cg, bpref, size) ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0); bdwrite(bp); return (blkno); @@ -1529,10 +1529,11 @@ fail: * blocks may be fragmented by the routine that allocates them. */ static ufs2_daddr_t -ffs_alloccgblk(ip, bp, bpref) +ffs_alloccgblk(ip, bp, bpref, size) struct inode *ip; struct buf *bp; ufs2_daddr_t bpref; + int size; { struct fs *fs; struct cg *cgp; @@ -1540,6 +1541,7 @@ ffs_alloccgblk(ip, bp, bpref) ufs1_daddr_t bno; ufs2_daddr_t blkno; u_int8_t *blksfree; + int i; fs = ip->i_fs; ump = ip->i_ump; @@ -1567,16 +1569,32 @@ ffs_alloccgblk(ip, bp, bpref) gotit: blkno = fragstoblks(fs, bno); ffs_clrblock(fs, blksfree, (long)blkno); - ffs_clusteracct(ump, fs, cgp, blkno, -1); + ffs_clusteracct(fs, cgp, blkno, -1); cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; fs->fs_fmod = 1; blkno = cgbase(fs, cgp->cg_cgx) + bno; + /* + * If the caller didn't want the whole block free the frags here. + */ + size = numfrags(fs, size); + if (size != fs->fs_frag) { + bno = dtogd(fs, blkno); + for (i = size; i < fs->fs_frag; i++) + setbit(blksfree, bno + i); + i = fs->fs_frag - size; + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i; + fs->fs_fmod = 1; + cgp->cg_frsum[i]++; + } /* XXX Fixme. */ UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno); + softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, + size, 0); UFS_LOCK(ump); return (blkno); } @@ -1589,11 +1607,12 @@ gotit: * take the first one that we find following bpref. */ static ufs2_daddr_t -ffs_clusteralloc(ip, cg, bpref, len) +ffs_clusteralloc(ip, cg, bpref, len, unused) struct inode *ip; u_int cg; ufs2_daddr_t bpref; int len; + int unused; { struct fs *fs; struct cg *cgp; @@ -1689,7 +1708,7 @@ ffs_clusteralloc(ip, cg, bpref, len) len = blkstofrags(fs, len); UFS_LOCK(ump); for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i) panic("ffs_clusteralloc: lost block"); ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); @@ -1713,11 +1732,12 @@ fail: * inode in the specified cylinder group. */ static ufs2_daddr_t -ffs_nodealloccg(ip, cg, ipref, mode) +ffs_nodealloccg(ip, cg, ipref, mode, unused) struct inode *ip; u_int cg; ufs2_daddr_t ipref; int mode; + int unused; { struct fs *fs; struct cg *cgp; @@ -1819,28 +1839,6 @@ gotit: return ((ino_t)(cg * fs->fs_ipg + ipref)); } -/* - * check if a block is free - */ -static int -ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) -{ - - switch ((int)fs->fs_frag) { - case 8: - return (cp[h] == 0); - case 4: - return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); - case 2: - return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); - case 1: - return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); - default: - panic("ffs_isfreeblock"); - } - return (0); -} - /* * Free a block or fragment. * @@ -1849,14 +1847,16 @@ ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h) * block reassembly is checked. */ void -ffs_blkfree(ump, fs, devvp, bno, size, inum) +ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + struct workhead *dephd; { + struct mount *mp; struct cg *cgp; struct buf *bp; ufs1_daddr_t fragno, cgbno; @@ -1923,7 +1923,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum) panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, blksfree, fragno); - ffs_clusteracct(ump, fs, cgp, fragno, 1); + ffs_clusteracct(fs, cgp, fragno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; @@ -1963,7 +1963,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum) cgp->cg_cs.cs_nffree -= fs->fs_frag; fs->fs_cstotal.cs_nffree -= fs->fs_frag; fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; - ffs_clusteracct(ump, fs, cgp, fragno, 1); + ffs_clusteracct(fs, cgp, fragno, 1); cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; @@ -1972,6 +1972,10 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum) fs->fs_fmod = 1; ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); + mp = UFSTOVFS(ump); + if (mp->mnt_flag & MNT_SOFTDEP && devvp->v_type != VREG) + softdep_setup_blkfree(UFSTOVFS(ump), bp, bno, + numfrags(fs, size), dephd); bdwrite(bp); } @@ -2042,7 +2046,8 @@ ffs_vfree(pvp, ino, mode) return (0); } ip = VTOI(pvp); - return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode)); + return (ffs_freefile(ip->i_ump, ip->i_fs, ip->i_devvp, ino, mode, + NULL)); } /* @@ -2050,12 +2055,13 @@ ffs_vfree(pvp, ino, mode) * The specified inode is placed back in the free map. */ int -ffs_freefile(ump, fs, devvp, ino, mode) +ffs_freefile(ump, fs, devvp, ino, mode, wkhd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ino_t ino; int mode; + struct workhead *wkhd; { struct cg *cgp; struct buf *bp; @@ -2112,6 +2118,9 @@ ffs_freefile(ump, fs, devvp, ino, mode) fs->fs_fmod = 1; ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); + if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP && devvp->v_type != VREG) + softdep_setup_inofree(UFSTOVFS(ump), bp, + ino + cg * fs->fs_ipg, wkhd); bdwrite(bp); return (0); } @@ -2225,101 +2234,6 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz) return (-1); } -/* - * Update the cluster map because of an allocation or free. - * - * Cnt == 1 means free; cnt == -1 means allocating. - */ -void -ffs_clusteracct(ump, fs, cgp, blkno, cnt) - struct ufsmount *ump; - struct fs *fs; - struct cg *cgp; - ufs1_daddr_t blkno; - int cnt; -{ - int32_t *sump; - int32_t *lp; - u_char *freemapp, *mapp; - int i, start, end, forw, back, map, bit; - - mtx_assert(UFS_MTX(ump), MA_OWNED); - - if (fs->fs_contigsumsize <= 0) - return; - freemapp = cg_clustersfree(cgp); - sump = cg_clustersum(cgp); - /* - * Allocate or clear the actual block. - */ - if (cnt > 0) - setbit(freemapp, blkno); - else - clrbit(freemapp, blkno); - /* - * Find the size of the cluster going forward. - */ - start = blkno + 1; - end = start + fs->fs_contigsumsize; - if (end >= cgp->cg_nclusterblks) - end = cgp->cg_nclusterblks; - mapp = &freemapp[start / NBBY]; - map = *mapp++; - bit = 1 << (start % NBBY); - for (i = start; i < end; i++) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - forw = i - start; - /* - * Find the size of the cluster going backward. - */ - start = blkno - 1; - end = start - fs->fs_contigsumsize; - if (end < 0) - end = -1; - mapp = &freemapp[start / NBBY]; - map = *mapp--; - bit = 1 << (start % NBBY); - for (i = start; i > end; i--) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != 0) { - bit >>= 1; - } else { - map = *mapp--; - bit = 1 << (NBBY - 1); - } - } - back = start - i; - /* - * Account for old cluster and the possibly new forward and - * back clusters. - */ - i = back + forw + 1; - if (i > fs->fs_contigsumsize) - i = fs->fs_contigsumsize; - sump[i] += cnt; - if (back > 0) - sump[back] -= cnt; - if (forw > 0) - sump[forw] -= cnt; - /* - * Update cluster summary information. - */ - lp = &sump[fs->fs_contigsumsize]; - for (i = fs->fs_contigsumsize; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cgp->cg_cgx] = i; -} - /* * Fserr prints the name of a filesystem with an error diagnostic. * @@ -2540,7 +2454,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) #endif /* DEBUG */ while (cmd.size > 0) { if ((error = ffs_freefile(ump, fs, ump->um_devvp, - cmd.value, filetype))) + cmd.value, filetype, NULL))) break; cmd.size -= 1; cmd.value += 1; @@ -2568,7 +2482,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if (blksize > blkcnt) blksize = blkcnt; ffs_blkfree(ump, fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, ROOTINO); + blksize * fs->fs_fsize, ROOTINO, NULL); blkno += blksize; blkcnt -= blksize; blksize = fs->fs_frag; diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index a12f96e60d0..6d5f27c1f30 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -120,6 +120,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, if (lbn < 0) return (EFBIG); + if (DOINGSOFTDEP(vp)) + softdep_prealloc(vp, MNT_WAIT); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment @@ -418,6 +420,8 @@ fail: * slow, running out of disk space is not expected to be a common * occurence. The error return from fsync is ignored as we already * have an error to return to the user. + * + * XXX Still have to journal the free below */ (void) ffs_syncvnode(vp, MNT_WAIT); for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; @@ -473,7 +477,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); } return (error); } @@ -515,6 +519,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, if (lbn < 0) return (EFBIG); + if (DOINGSOFTDEP(vp)) + softdep_prealloc(vp, MNT_WAIT); + /* * Check for allocating external data. */ @@ -930,6 +937,8 @@ fail: * slow, running out of disk space is not expected to be a common * occurence. The error return from fsync is ignored as we already * have an error to return to the user. + * + * XXX Still have to journal the free below */ (void) ffs_syncvnode(vp, MNT_WAIT); for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; @@ -985,7 +994,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); } return (error); } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 7e32ced2ebe..7011623749b 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -47,6 +47,7 @@ struct ucred; struct vnode; struct vop_fsync_args; struct vop_reallocblks_args; +struct workhead; int ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int, struct ucred *, ufs2_daddr_t *); @@ -56,20 +57,23 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, - ufs2_daddr_t, long, ino_t); + ufs2_daddr_t, long, ino_t, struct workhead *); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); +void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_bdflush(struct bufobj *, struct buf *); int ffs_copyonwrite(struct vnode *, struct buf *); int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, - int); + int, struct workhead *); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); +int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); int ffs_mountroot(void); +void ffs_oldfscompat_write(struct fs *, struct ufsmount *); int ffs_reallocblks(struct vop_reallocblks_args *); int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); @@ -103,12 +107,14 @@ extern struct vop_vector ffs_fifoops2; int softdep_check_suspend(struct mount *, struct vnode *, int, int, int, int); +int softdep_complete_trunc(struct vnode *, void *); void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, struct ucred *); -void softdep_move_dependencies(struct buf *, struct buf *); +void softdep_unmount(struct mount *); +int softdep_move_dependencies(struct buf *, struct buf *); int softdep_flushworklist(struct mount *, int *, struct thread *); int softdep_flushfiles(struct mount *, int, struct thread *); void softdep_update_inodeblock(struct inode *, struct buf *, int); @@ -117,7 +123,8 @@ void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *); void softdep_setup_freeblocks(struct inode *, off_t, int); void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); -void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t); +void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, + int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, @@ -126,11 +133,20 @@ void softdep_setup_allocindir_meta(struct buf *, struct inode *, struct buf *, int, ufs2_daddr_t); void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); +void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int, + struct workhead *); +void softdep_setup_inofree(struct mount *, struct buf *, ino_t, + struct workhead *); +void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); +void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); int softdep_process_worklist(struct mount *, int); int softdep_fsync(struct vnode *); int softdep_waitidle(struct mount *); +int softdep_prealloc(struct vnode *, int); +int softdep_journal_lookup(struct mount *, struct vnode **); + int ffs_rdonly(struct inode *); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index b2f90673012..3b6983258b9 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -92,15 +92,6 @@ ffs_update(vp, waitfor) fs = ip->i_fs; if (fs->fs_ronly) return (0); - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ - fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_din1->di_ouid = ip->i_uid; /* XXX */ - ip->i_din1->di_ogid = ip->i_gid; /* XXX */ - } /* XXX */ error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { @@ -160,6 +151,7 @@ ffs_truncate(vp, length, flags, cred, td) ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks; + void *cookie; struct bufobj *bo; struct fs *fs; struct buf *bp; @@ -173,11 +165,14 @@ ffs_truncate(vp, length, flags, cred, td) fs = ip->i_fs; ump = ip->i_ump; bo = &vp->v_bufobj; + cookie = NULL; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); if (length < 0) return (EINVAL); + if (length > fs->fs_maxfilesize) + return (EFBIG); /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume @@ -192,6 +187,7 @@ ffs_truncate(vp, length, flags, cred, td) * (e.g., the file is being unlinked), then pick it off with * soft updates below. */ + allerror = 0; needextclean = 0; softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp); extblocks = 0; @@ -212,6 +208,8 @@ ffs_truncate(vp, length, flags, cred, td) panic("ffs_truncate: partial trunc of extdata"); if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) return (error); + if (DOINGSUJ(vp)) + cookie = softdep_setup_trunc(vp, length, flags); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; #ifdef QUOTA @@ -227,19 +225,19 @@ ffs_truncate(vp, length, flags, cred, td) } ip->i_flag |= IN_CHANGE; if ((error = ffs_update(vp, 1))) - return (error); + goto out; for (i = 0; i < NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i], - sblksize(fs, osize, i), ip->i_number); + sblksize(fs, osize, i), ip->i_number, NULL); } } } - if ((flags & IO_NORMAL) == 0) - return (0); - if (length > fs->fs_maxfilesize) - return (EFBIG); + if ((flags & IO_NORMAL) == 0) { + error = 0; + goto out; + } if (vp->v_type == VLNK && (ip->i_size < vp->v_mount->mnt_maxsymlinklen || datablocks == 0)) { @@ -253,24 +251,52 @@ ffs_truncate(vp, length, flags, cred, td) ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) softdep_setup_freeblocks(ip, length, IO_EXT); - return (ffs_update(vp, 1)); + error = ffs_update(vp, 1); + goto out; } if (ip->i_size == length) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) softdep_setup_freeblocks(ip, length, IO_EXT); - return (ffs_update(vp, 0)); + error = ffs_update(vp, 0); + goto out; } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); #ifdef QUOTA error = getinoquota(ip); if (error) - return (error); + goto out; #endif if ((ip->i_flags & SF_SNAPSHOT) != 0) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; + osize = ip->i_size; + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of osize is 0, length will be at least 1. + */ + if (osize < length) { + vnode_pager_setsize(vp, length); + flags |= BA_CLRBUF; + error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); + if (error) { + vnode_pager_setsize(vp, osize); + goto out; + } + ip->i_size = length; + DIP_SET(ip, i_size, length); + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; + if (flags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + error = ffs_update(vp, 1); + goto out; + } if (DOINGSOFTDEP(vp)) { if (length > 0 || softdepslowdown) { /* @@ -283,11 +309,18 @@ ffs_truncate(vp, length, flags, cred, td) * so that it will have no data structures left. */ if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) - return (error); + goto out; UFS_LOCK(ump); if (ip->i_flag & IN_SPACECOUNTED) fs->fs_pendingblocks -= datablocks; UFS_UNLOCK(ump); + /* + * We have to journal the truncation before we change + * any blocks so we don't leave the file partially + * truncated. + */ + if (DOINGSUJ(vp) && cookie == NULL) + cookie = softdep_setup_trunc(vp, length, flags); } else { #ifdef QUOTA (void) chkdq(ip, -datablocks, NOCRED, 0); @@ -301,34 +334,10 @@ ffs_truncate(vp, length, flags, cred, td) OFF_TO_IDX(lblktosize(fs, -extblocks))); vnode_pager_setsize(vp, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(vp, 0)); + error = ffs_update(vp, 0); + goto out; } } - osize = ip->i_size; - /* - * Lengthen the size of the file. We must ensure that the - * last byte of the file is allocated. Since the smallest - * value of osize is 0, length will be at least 1. - */ - if (osize < length) { - vnode_pager_setsize(vp, length); - flags |= BA_CLRBUF; - error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); - if (error) { - vnode_pager_setsize(vp, osize); - return (error); - } - ip->i_size = length; - DIP_SET(ip, i_size, length); - if (bp->b_bufsize == fs->fs_bsize) - bp->b_flags |= B_CLUSTEROK; - if (flags & IO_SYNC) - bwrite(bp); - else - bawrite(bp); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(vp, 1)); - } /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the @@ -345,9 +354,8 @@ ffs_truncate(vp, length, flags, cred, td) lbn = lblkno(fs, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); - if (error) { - return (error); - } + if (error) + goto out; /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized @@ -359,7 +367,7 @@ ffs_truncate(vp, length, flags, cred, td) if (DOINGSOFTDEP(vp) && lbn < NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT)) != 0) - return (error); + goto out; ip->i_size = length; DIP_SET(ip, i_size, length); size = blksize(fs, ip, lbn); @@ -405,7 +413,13 @@ ffs_truncate(vp, length, flags, cred, td) DIP_SET(ip, i_db[i], 0); } ip->i_flag |= IN_CHANGE | IN_UPDATE; - allerror = ffs_update(vp, 1); + /* + * When doing softupdate journaling we must preserve the size along + * with the old pointers until they are freed or we might not + * know how many fragments remain. + */ + if (!DOINGSUJ(vp)) + allerror = ffs_update(vp, 1); /* * Having written the new inode to disk, save its new configuration @@ -445,7 +459,7 @@ ffs_truncate(vp, length, flags, cred, td) if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ip->i_devvp, bn, - fs->fs_bsize, ip->i_number); + fs->fs_bsize, ip->i_number, NULL); blocksreleased += nblocks; } } @@ -464,7 +478,8 @@ ffs_truncate(vp, length, flags, cred, td) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); - ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number); + ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number, + NULL); blocksreleased += btodb(bsize); } if (lastblock < 0) @@ -496,7 +511,7 @@ ffs_truncate(vp, length, flags, cred, td) */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ip->i_devvp, bn, - oldspace - newspace, ip->i_number); + oldspace - newspace, ip->i_number, NULL); blocksreleased += btodb(oldspace - newspace); } } @@ -528,7 +543,14 @@ done: #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, 0); #endif - return (allerror); + error = allerror; +out: + if (cookie) { + allerror = softdep_complete_trunc(vp, cookie); + if (allerror != 0 && error == 0) + error = allerror; + } + return (error); } /* @@ -638,7 +660,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) blocksreleased += blkcount; } ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize, - ip->i_number); + ip->i_number, NULL); blocksreleased += nblocks; } diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index b36cb58808b..f6548a385eb 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -142,7 +142,7 @@ MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); static int cgaccount(int, struct vnode *, struct buf *, int); static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, - ufs_lbn_t, int), int); + ufs_lbn_t, int), int, int); static int indiracct_ufs1(struct vnode *, struct vnode *, int, ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, @@ -155,7 +155,7 @@ static int mapacct_ufs1(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, ufs_lbn_t, int); static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, - ufs_lbn_t, int), int); + ufs_lbn_t, int), int, int); static int indiracct_ufs2(struct vnode *, struct vnode *, int, ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, @@ -582,7 +582,8 @@ loop: len = fragroundup(fs, blkoff(fs, xp->i_size)); if (len != 0 && len < fs->fs_bsize) { ffs_blkfree(ump, copy_fs, vp, - DIP(xp, i_db[loc]), len, xp->i_number); + DIP(xp, i_db[loc]), len, xp->i_number, + NULL); blkno = DIP(xp, i_db[loc]); DIP_SET(xp, i_db[loc], 0); } @@ -590,15 +591,15 @@ loop: snaplistsize += 1; if (xp->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, - BLK_NOCOPY); + BLK_NOCOPY, 1); else error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, - BLK_NOCOPY); + BLK_NOCOPY, 1); if (blkno) DIP_SET(xp, i_db[loc], blkno); if (!error) error = ffs_freefile(ump, copy_fs, vp, xp->i_number, - xp->i_mode); + xp->i_mode, NULL); VOP_UNLOCK(xvp, 0); vdrop(xvp); if (error) { @@ -611,6 +612,26 @@ loop: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); + /* + * Erase the journal file from the snapshot. + */ + if (fs->fs_flags & FS_SUJ) { + error = softdep_journal_lookup(mp, &xvp); + if (error) { + free(copy_fs->fs_csp, M_UFSMNT); + bawrite(sbp); + sbp = NULL; + goto out1; + } + xp = VTOI(xvp); + if (xp->i_ump->um_fstype == UFS1) + error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, + BLK_NOCOPY, 0); + else + error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, + BLK_NOCOPY, 0); + vput(xvp); + } /* * Acquire a lock on the snapdata structure, creating it if necessary. */ @@ -691,16 +712,16 @@ out1: break; if (xp->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, - BLK_SNAP); + BLK_SNAP, 0); else error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, - BLK_SNAP); + BLK_SNAP, 0); if (error == 0 && xp->i_effnlink == 0) { error = ffs_freefile(ump, copy_fs, vp, xp->i_number, - xp->i_mode); + xp->i_mode, NULL); } if (error) { fs->fs_snapinum[snaploc] = 0; @@ -719,9 +740,11 @@ out1: * the list of allocated blocks in i_snapblklist. */ if (ip->i_ump->um_fstype == UFS1) - error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); + error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, + BLK_SNAP, 0); else - error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); + error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, + BLK_SNAP, 0); if (error) { fs->fs_snapinum[snaploc] = 0; free(snapblklist, M_UFSMNT); @@ -954,13 +977,14 @@ cgaccount(cg, vp, nbp, passno) * is reproduced once each for UFS1 and UFS2. */ static int -expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) +expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) struct vnode *snapvp; struct inode *cancelip; struct fs *fs; int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, ufs_lbn_t, int); int expungetype; + int clearmode; { int i, error, indiroff; ufs_lbn_t lbn, rlbn; @@ -978,6 +1002,8 @@ expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) if (lbn < NDADDR) { blkno = VTOI(snapvp)->i_din1->di_db[lbn]; } else { + if (DOINGSOFTDEP(snapvp)) + softdep_prealloc(snapvp, MNT_WAIT); td->td_pflags |= TDP_COWINPROGRESS; error = ffs_balloc_ufs1(snapvp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); @@ -1005,7 +1031,7 @@ expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) */ dip = (struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0) + if (clearmode || cancelip->i_effnlink == 0) dip->di_mode = 0; dip->di_size = 0; dip->di_blocks = 0; @@ -1220,7 +1246,7 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); } return (0); } @@ -1234,13 +1260,14 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) * is reproduced once each for UFS1 and UFS2. */ static int -expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) +expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) struct vnode *snapvp; struct inode *cancelip; struct fs *fs; int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, ufs_lbn_t, int); int expungetype; + int clearmode; { int i, error, indiroff; ufs_lbn_t lbn, rlbn; @@ -1258,6 +1285,8 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) if (lbn < NDADDR) { blkno = VTOI(snapvp)->i_din2->di_db[lbn]; } else { + if (DOINGSOFTDEP(snapvp)) + softdep_prealloc(snapvp, MNT_WAIT); td->td_pflags |= TDP_COWINPROGRESS; error = ffs_balloc_ufs2(snapvp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, KERNCRED, BA_METAONLY, &bp); @@ -1285,7 +1314,7 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) */ dip = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY) + if (clearmode || cancelip->i_effnlink == 0) dip->di_mode = 0; dip->di_size = 0; dip->di_blocks = 0; @@ -1500,7 +1529,7 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); } return (0); } @@ -1657,6 +1686,13 @@ ffs_snapremove(vp) ip->i_flags &= ~SF_SNAPSHOT; DIP_SET(ip, i_flags, ip->i_flags); ip->i_flag |= IN_CHANGE | IN_UPDATE; + /* + * The dirtied indirects must be written out before + * softdep_setup_freeblocks() is called. Otherwise indir_trunc() + * may find indirect pointers using the magic BLK_* values. + */ + if (DOINGSOFTDEP(vp)) + ffs_syncvnode(vp, MNT_WAIT); #ifdef QUOTA /* * Reenable disk quotas for ex-snapshot file. @@ -1714,6 +1750,8 @@ retry: goto retry; TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { vp = ITOV(ip); + if (DOINGSOFTDEP(vp)) + softdep_prealloc(vp, MNT_WAIT); /* * Lookup block being written. */ @@ -2236,6 +2274,8 @@ ffs_copyonwrite(devvp, bp) } TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { vp = ITOV(ip); + if (DOINGSOFTDEP(vp)) + softdep_prealloc(vp, MNT_WAIT); /* * We ensure that everything of our own that needs to be * copied will be done at the time that ffs_snapshot is diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 4d652c114dd..49510a71402 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,5 +1,7 @@ /*- - * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. + * Copyright 1998, 2000 Marshall Kirk McKusick. + * Copyright 2009, 2010 Jeffrey W. Roberson + * All rights reserved. * * The soft updates code is derived from the appendix of a University * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, @@ -23,17 +25,16 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00 */ @@ -50,6 +51,7 @@ __FBSDID("$FreeBSD$"); #ifndef DEBUG #define DEBUG #endif +#define SUJ_DEBUG #include #include @@ -62,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -119,6 +122,21 @@ softdep_uninitialize() return; } +void +softdep_unmount(mp) + struct mount *mp; +{ + +} + +void +softdep_setup_sbupdate(ump, fs, bp) + struct ufsmount *ump; + struct fs *fs; + struct buf *bp; +{ +} + void softdep_setup_inomapdep(bp, ip, newinum) struct buf *bp; @@ -130,10 +148,12 @@ softdep_setup_inomapdep(bp, ip, newinum) } void -softdep_setup_blkmapdep(bp, mp, newblkno) +softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags) struct buf *bp; struct mount *mp; ufs2_daddr_t newblkno; + int frags; + int oldfrags; { panic("softdep_setup_blkmapdep called"); @@ -227,7 +247,8 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) } void -softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) +softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize) + struct buf *bp; struct inode *dp; caddr_t base; caddr_t oldloc; @@ -261,6 +282,162 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) panic("softdep_setup_directory_change called"); } +void * +softdep_setup_trunc(vp, length, flags) + struct vnode *vp; + off_t length; + int flags; +{ + + panic("%s called", __FUNCTION__); + + return (NULL); +} + +int +softdep_complete_trunc(vp, cookie) + struct vnode *vp; + void *cookie; +{ + + panic("%s called", __FUNCTION__); + + return (0); +} + +void +softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) + struct mount *mp; + struct buf *bp; + ufs2_daddr_t blkno; + int frags; + struct workhead *wkhd; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_inofree(mp, bp, ino, wkhd) + struct mount *mp; + struct buf *bp; + ino_t ino; + struct workhead *wkhd; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_unlink(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_revert_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_revert_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_revert_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_revert_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +void +softdep_setup_dotdot_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + panic("%s called", __FUNCTION__); +} + +int +softdep_prealloc(vp, waitok) + struct vnode *vp; + int waitok; +{ + + panic("%s called", __FUNCTION__); + + return (0); +} + +int +softdep_journal_lookup(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + + return (ENOENT); +} + void softdep_change_linkcnt(ip) struct inode *ip; @@ -403,31 +580,13 @@ softdep_get_depcounts(struct mount *mp, * These definitions need to be adapted to the system to which * this file is being ported. */ -/* - * malloc types defined for the softdep system. - */ -static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies"); -static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies"); -static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation"); -static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map"); -static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode"); -static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies"); -static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block"); -static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode"); -static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode"); -static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated"); -static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry"); -static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory"); -static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted"); -static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block"); -static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes"); #define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE) #define D_PAGEDEP 0 #define D_INODEDEP 1 -#define D_NEWBLK 2 -#define D_BMSAFEMAP 3 +#define D_BMSAFEMAP 2 +#define D_NEWBLK 3 #define D_ALLOCDIRECT 4 #define D_INDIRDEP 5 #define D_ALLOCINDIR 6 @@ -438,7 +597,67 @@ static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes"); #define D_MKDIR 11 #define D_DIRREM 12 #define D_NEWDIRBLK 13 -#define D_LAST D_NEWDIRBLK +#define D_FREEWORK 14 +#define D_FREEDEP 15 +#define D_JADDREF 16 +#define D_JREMREF 17 +#define D_JMVREF 18 +#define D_JNEWBLK 19 +#define D_JFREEBLK 20 +#define D_JFREEFRAG 21 +#define D_JSEG 22 +#define D_JSEGDEP 23 +#define D_SBDEP 24 +#define D_JTRUNC 25 +#define D_LAST D_JTRUNC + +unsigned long dep_current[D_LAST + 1]; +unsigned long dep_total[D_LAST + 1]; + + +SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0, + "total dependencies allocated"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0, + "current dependencies allocated"); + +#define SOFTDEP_TYPE(type, str, long) \ + static MALLOC_DEFINE(M_ ## type, #str, long); \ + SYSCTL_LONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \ + &dep_total[D_ ## type], 0, ""); \ + SYSCTL_LONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \ + &dep_current[D_ ## type], 0, ""); + +SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); +SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies"); +SOFTDEP_TYPE(BMSAFEMAP, bmsafemap, + "Block or frag allocated from cyl group map"); +SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency"); +SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode"); +SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies"); +SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block"); +SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode"); +SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode"); +SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated"); +SOFTDEP_TYPE(DIRADD, diradd, "New directory entry"); +SOFTDEP_TYPE(MKDIR, mkdir, "New directory"); +SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted"); +SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block"); +SOFTDEP_TYPE(FREEWORK, freework, "free an inode block"); +SOFTDEP_TYPE(FREEDEP, freedep, "track a block free"); +SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add"); +SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove"); +SOFTDEP_TYPE(JMVREF, jmvref, "Journal inode ref move"); +SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block"); +SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block"); +SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag"); +SOFTDEP_TYPE(JSEG, jseg, "Journal segment"); +SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete"); +SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency"); +SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation"); + +static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes"); +static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations"); /* * translate from workitem type to memory type @@ -447,8 +666,8 @@ static MALLOC_DEFINE(M_SAVEDINO, "savedino","Saved inodes"); static struct malloc_type *memtype[] = { M_PAGEDEP, M_INODEDEP, - M_NEWBLK, M_BMSAFEMAP, + M_NEWBLK, M_ALLOCDIRECT, M_INDIRDEP, M_ALLOCINDIR, @@ -458,7 +677,19 @@ static struct malloc_type *memtype[] = { M_DIRADD, M_MKDIR, M_DIRREM, - M_NEWDIRBLK + M_NEWDIRBLK, + M_FREEWORK, + M_FREEDEP, + M_JADDREF, + M_JREMREF, + M_JMVREF, + M_JNEWBLK, + M_JFREEBLK, + M_JFREEFRAG, + M_JSEG, + M_JSEGDEP, + M_SBDEP, + M_JTRUNC }; #define DtoM(type) (memtype[type]) @@ -467,17 +698,21 @@ static struct malloc_type *memtype[] = { * Names of malloc types. */ #define TYPENAME(type) \ - ((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???") + ((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???") /* * End system adaptation definitions. */ +#define DOTDOT_OFFSET offsetof(struct dirtemplate, dotdot_ino) +#define DOT_OFFSET offsetof(struct dirtemplate, dot_ino) + /* * Forward declarations. */ struct inodedep_hashhead; struct newblk_hashhead; struct pagedep_hashhead; +struct bmsafemap_hashhead; /* * Internal function prototypes. @@ -487,59 +722,172 @@ static void drain_output(struct vnode *); static struct buf *getdirtybuf(struct buf *, struct mtx *, int); static void clear_remove(struct thread *); static void clear_inodedeps(struct thread *); +static void unlinked_inodedep(struct mount *, struct inodedep *); +static void clear_unlinked_inodedep(struct inodedep *); +static struct inodedep *first_unlinked_inodedep(struct ufsmount *); static int flush_pagedep_deps(struct vnode *, struct mount *, struct diraddhd *); +static void free_pagedep(struct pagedep *); +static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t); static int flush_inodedep_deps(struct mount *, ino_t); static int flush_deplist(struct allocdirectlst *, int, int *); static int handle_written_filepage(struct pagedep *, struct buf *); +static int handle_written_sbdep(struct sbdep *, struct buf *); +static void initiate_write_sbdep(struct sbdep *); static void diradd_inode_written(struct diradd *, struct inodedep *); +static int handle_written_indirdep(struct indirdep *, struct buf *, + struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); -static void handle_allocdirect_partdone(struct allocdirect *); +static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); +static void handle_written_jaddref(struct jaddref *); +static void handle_written_jremref(struct jremref *); +static void handle_written_jseg(struct jseg *, struct buf *); +static void handle_written_jnewblk(struct jnewblk *); +static void handle_written_jfreeblk(struct jfreeblk *); +static void handle_written_jfreefrag(struct jfreefrag *); +static void complete_jseg(struct jseg *); +static void jseg_write(struct fs *, struct jblocks *, struct jseg *, + uint8_t *); +static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); +static void jremref_write(struct jremref *, struct jseg *, uint8_t *); +static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *); +static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *); +static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *); +static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *); +static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *); +static inline void inoref_write(struct inoref *, struct jseg *, + struct jrefrec *); +static void handle_allocdirect_partdone(struct allocdirect *, + struct workhead *); +static void cancel_newblk(struct newblk *, struct workhead *); +static void indirdep_complete(struct indirdep *); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); +static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); static void handle_workitem_freefile(struct freefile *); static void handle_workitem_remove(struct dirrem *, struct vnode *); static struct dirrem *newdirrem(struct buf *, struct inode *, struct inode *, int, struct dirrem **); -static void free_diradd(struct diradd *); -static void free_allocindir(struct allocindir *, struct inodedep *); +static void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *, + struct freeblks *); +static void free_indirdep(struct indirdep *); +static void free_diradd(struct diradd *, struct workhead *); +static void merge_diradd(struct inodedep *, struct diradd *); +static void complete_diradd(struct diradd *); +static struct diradd *diradd_lookup(struct pagedep *, int); +static struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *, + struct jremref *); +static struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *, + struct jremref *); +static void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *, + struct jremref *, struct jremref *); +static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *, + struct jremref *); +static void cancel_allocindir(struct allocindir *, struct inodedep *, + struct freeblks *); +static void complete_mkdir(struct mkdir *); static void free_newdirblk(struct newdirblk *); -static int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t, - ufs2_daddr_t *); -static void deallocate_dependencies(struct buf *, struct inodedep *); -static void free_allocdirect(struct allocdirectlst *, - struct allocdirect *, int); +static void free_jremref(struct jremref *); +static void free_jaddref(struct jaddref *); +static void free_jsegdep(struct jsegdep *); +static void free_jseg(struct jseg *); +static void free_jnewblk(struct jnewblk *); +static void free_jfreeblk(struct jfreeblk *); +static void free_jfreefrag(struct jfreefrag *); +static void free_freedep(struct freedep *); +static void journal_jremref(struct dirrem *, struct jremref *, + struct inodedep *); +static void cancel_jnewblk(struct jnewblk *, struct workhead *); +static int cancel_jaddref(struct jaddref *, struct inodedep *, + struct workhead *); +static void cancel_jfreefrag(struct jfreefrag *); +static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); +static int deallocate_dependencies(struct buf *, struct inodedep *, + struct freeblks *); +static void free_newblk(struct newblk *); +static void cancel_allocdirect(struct allocdirectlst *, + struct allocdirect *, struct freeblks *, int); static int check_inode_unwritten(struct inodedep *); static int free_inodedep(struct inodedep *); +static void freework_freeblock(struct freework *); static void handle_workitem_freeblocks(struct freeblks *, int); +static void handle_complete_freeblocks(struct freeblks *); +static void handle_workitem_indirblk(struct freework *); +static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); static void setup_allocindir_phase2(struct buf *, struct inode *, - struct allocindir *); + struct inodedep *, struct allocindir *, ufs_lbn_t); static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, - ufs2_daddr_t); + ufs2_daddr_t, ufs_lbn_t); static void handle_workitem_freefrag(struct freefrag *); -static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long); +static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long, + ufs_lbn_t); static void allocdirect_merge(struct allocdirectlst *, struct allocdirect *, struct allocdirect *); -static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *); -static int newblk_find(struct newblk_hashhead *, struct fs *, ufs2_daddr_t, - struct newblk **); -static int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **); +static struct freefrag *allocindir_merge(struct allocindir *, + struct allocindir *); +static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int, + struct bmsafemap **); +static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *, + int cg); +static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t, + int, struct newblk **); +static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **); static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t, struct inodedep **); static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **); -static int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **); +static int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int, + struct pagedep **); static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t, struct mount *mp, int, struct pagedep **); static void pause_timer(void *); static int request_cleanup(struct mount *, int); static int process_worklist_item(struct mount *, int); -static void add_to_worklist(struct worklist *); +static void process_removes(struct vnode *); +static void jwork_move(struct workhead *, struct workhead *); +static void add_to_worklist(struct worklist *, int); +static void remove_from_worklist(struct worklist *); static void softdep_flush(void); static int softdep_speedup(void); +static void worklist_speedup(void); +static int journal_mount(struct mount *, struct fs *, struct ucred *); +static void journal_unmount(struct mount *); +static int journal_space(struct ufsmount *, int); +static void journal_suspend(struct ufsmount *); +static void softdep_prelink(struct vnode *, struct vnode *); +static void add_to_journal(struct worklist *); +static void remove_from_journal(struct worklist *); +static void softdep_process_journal(struct mount *, int); +static struct jremref *newjremref(struct dirrem *, struct inode *, + struct inode *ip, off_t, nlink_t); +static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t, + uint16_t); +static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t, + uint16_t); +static inline struct jsegdep *inoref_jseg(struct inoref *); +static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t); +static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t, + ufs2_daddr_t, int); +static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *, + ufs2_daddr_t, long, ufs_lbn_t); +static struct freework *newfreework(struct freeblks *, struct freework *, + ufs_lbn_t, ufs2_daddr_t, int, int); +static void jwait(struct worklist *wk); +static struct inodedep *inodedep_lookup_ip(struct inode *); +static int bmsafemap_rollbacks(struct bmsafemap *); +static struct freefile *handle_bufwait(struct inodedep *, struct workhead *); +static void handle_jwork(struct workhead *); +static struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *, + struct mkdir **); +static struct jblocks *jblocks_create(void); +static ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *); +static void jblocks_free(struct jblocks *, struct mount *, int); +static void jblocks_destroy(struct jblocks *); +static void jblocks_add(struct jblocks *, ufs2_daddr_t, int); /* * Exported softdep operations. @@ -572,39 +920,127 @@ MTX_SYSINIT(softdep_lock, &lk, "Softdep Lock", MTX_DEF); (item)->wk_state &= ~ONWORKLIST; \ LIST_REMOVE(item, wk_list); \ } while (0) -#else /* DEBUG */ -static void worklist_insert(struct workhead *, struct worklist *); -static void worklist_remove(struct worklist *); +#define WORKLIST_INSERT_UNLOCKED WORKLIST_INSERT +#define WORKLIST_REMOVE_UNLOCKED WORKLIST_REMOVE -#define WORKLIST_INSERT(head, item) worklist_insert(head, item) -#define WORKLIST_REMOVE(item) worklist_remove(item) +#else /* DEBUG */ +static void worklist_insert(struct workhead *, struct worklist *, int); +static void worklist_remove(struct worklist *, int); + +#define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1) +#define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0) +#define WORKLIST_REMOVE(item) worklist_remove(item, 1) +#define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0) static void -worklist_insert(head, item) +worklist_insert(head, item, locked) struct workhead *head; struct worklist *item; + int locked; { - mtx_assert(&lk, MA_OWNED); + if (locked) + mtx_assert(&lk, MA_OWNED); if (item->wk_state & ONWORKLIST) - panic("worklist_insert: already on list"); + panic("worklist_insert: %p %s(0x%X) already on list", + item, TYPENAME(item->wk_type), item->wk_state); item->wk_state |= ONWORKLIST; LIST_INSERT_HEAD(head, item, wk_list); } static void -worklist_remove(item) +worklist_remove(item, locked) struct worklist *item; + int locked; { - mtx_assert(&lk, MA_OWNED); + if (locked) + mtx_assert(&lk, MA_OWNED); if ((item->wk_state & ONWORKLIST) == 0) - panic("worklist_remove: not on list"); + panic("worklist_remove: %p %s(0x%X) not on list", + item, TYPENAME(item->wk_type), item->wk_state); item->wk_state &= ~ONWORKLIST; LIST_REMOVE(item, wk_list); } #endif /* DEBUG */ +/* + * Merge two jsegdeps keeping only the oldest one as newer references + * can't be discarded until after older references. + */ +static inline struct jsegdep * +jsegdep_merge(struct jsegdep *one, struct jsegdep *two) +{ + struct jsegdep *swp; + + if (two == NULL) + return (one); + + if (one->jd_seg->js_seq > two->jd_seg->js_seq) { + swp = one; + one = two; + two = swp; + } + WORKLIST_REMOVE(&two->jd_list); + free_jsegdep(two); + + return (one); +} + +/* + * If two freedeps are compatible free one to reduce list size. + */ +static inline struct freedep * +freedep_merge(struct freedep *one, struct freedep *two) +{ + if (two == NULL) + return (one); + + if (one->fd_freework == two->fd_freework) { + WORKLIST_REMOVE(&two->fd_list); + free_freedep(two); + } + return (one); +} + +/* + * Move journal work from one list to another. Duplicate freedeps and + * jsegdeps are coalesced to keep the lists as small as possible. + */ +static void +jwork_move(dst, src) + struct workhead *dst; + struct workhead *src; +{ + struct freedep *freedep; + struct jsegdep *jsegdep; + struct worklist *wkn; + struct worklist *wk; + + KASSERT(dst != src, + ("jwork_move: dst == src")); + freedep = NULL; + jsegdep = NULL; + LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) { + if (wk->wk_type == D_JSEGDEP) + jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep); + if (wk->wk_type == D_FREEDEP) + freedep = freedep_merge(WK_FREEDEP(wk), freedep); + } + + mtx_assert(&lk, MA_OWNED); + while ((wk = LIST_FIRST(src)) != NULL) { + WORKLIST_REMOVE(wk); + WORKLIST_INSERT(dst, wk); + if (wk->wk_type == D_JSEGDEP) { + jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep); + continue; + } + if (wk->wk_type == D_FREEDEP) + freedep = freedep_merge(WK_FREEDEP(wk), freedep); + } +} + /* * Routines for tracking and managing workitems. */ @@ -623,13 +1059,16 @@ workitem_free(item, type) #ifdef DEBUG if (item->wk_state & ONWORKLIST) - panic("workitem_free: still on list"); + panic("workitem_free: %s(0x%X) still on list", + TYPENAME(item->wk_type), item->wk_state); if (item->wk_type != type) - panic("workitem_free: type mismatch"); + panic("workitem_free: type mismatch %s != %s", + TYPENAME(item->wk_type), TYPENAME(type)); #endif ump = VFSTOUFS(item->wk_mp); if (--ump->softdep_deps == 0 && ump->softdep_req) wakeup(&ump->softdep_deps); + dep_current[type]--; free(item, DtoM(type)); } @@ -643,6 +1082,8 @@ workitem_alloc(item, type, mp) item->wk_mp = mp; item->wk_state = 0; ACQUIRE_LOCK(&lk); + dep_current[type]++; + dep_total[type]++; VFSTOUFS(mp)->softdep_deps++; VFSTOUFS(mp)->softdep_accdeps++; FREE_LOCK(&lk); @@ -678,24 +1119,66 @@ static int stat_indir_blk_ptrs; /* bufs redirtied as indir ptrs not written */ static int stat_inode_bitmap; /* bufs redirtied as inode bitmap not written */ static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */ +static int stat_jaddref; /* bufs redirtied as ino bitmap can not write */ +static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */ +static int stat_journal_min; /* Times hit journal min threshold */ +static int stat_journal_low; /* Times hit journal low threshold */ +static int stat_journal_wait; /* Times blocked in jwait(). */ +static int stat_jwait_filepage; /* Times blocked in jwait() for filepage. */ +static int stat_jwait_freeblks; /* Times blocked in jwait() for freeblks. */ +static int stat_jwait_inode; /* Times blocked in jwait() for inodes. */ +static int stat_jwait_newblk; /* Times blocked in jwait() for newblks. */ -SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, maxindirdeps, CTLFLAG_RW, &maxindirdeps, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,""); -SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, sync_limit_hit, CTLFLAG_RW, &stat_sync_limit_hit, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, ""); -/* SYSCTL_INT(_debug, OID_AUTO, worklist_num, CTLFLAG_RD, &softdep_on_worklist, 0, ""); */ +SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW, + &max_softdeps, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW, + &tickdelay, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW, + &maxindirdeps, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW, + &stat_worklist_push, 0,""); +SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW, + &stat_blk_limit_push, 0,""); +SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_push, CTLFLAG_RW, + &stat_ino_limit_push, 0,""); +SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_hit, CTLFLAG_RW, + &stat_blk_limit_hit, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_hit, CTLFLAG_RW, + &stat_ino_limit_hit, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, sync_limit_hit, CTLFLAG_RW, + &stat_sync_limit_hit, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, + &stat_indir_blk_ptrs, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, inode_bitmap, CTLFLAG_RW, + &stat_inode_bitmap, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, + &stat_direct_blk_ptrs, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW, + &stat_dir_entry, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW, + &stat_jaddref, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW, + &stat_jnewblk, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW, + &stat_journal_low, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW, + &stat_journal_min, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW, + &stat_journal_wait, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW, + &stat_jwait_filepage, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW, + &stat_jwait_freeblks, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW, + &stat_jwait_inode, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW, + &stat_jwait_newblk, 0, ""); SYSCTL_DECL(_vfs_ffs); +LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl; +static u_long bmsafemap_hash; /* size of hash table - 1 */ + static int compute_summary_at_mount = 0; /* Whether to recompute the summary at mount time */ SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW, &compute_summary_at_mount, 0, "Recompute summary at mount"); @@ -770,16 +1253,22 @@ softdep_flush(void) } } -static int -softdep_speedup(void) +static void +worklist_speedup(void) { - mtx_assert(&lk, MA_OWNED); if (req_pending == 0) { req_pending = 1; wakeup(&req_pending); } +} +static int +softdep_speedup(void) +{ + + worklist_speedup(); + bd_speedup(); return speedup_syncer(); } @@ -791,15 +1280,17 @@ softdep_speedup(void) * and does so in order from first to last. */ static void -add_to_worklist(wk) +add_to_worklist(wk, nodelay) struct worklist *wk; + int nodelay; { struct ufsmount *ump; mtx_assert(&lk, MA_OWNED); ump = VFSTOUFS(wk->wk_mp); if (wk->wk_state & ONWORKLIST) - panic("add_to_worklist: already on list"); + panic("add_to_worklist: %s(0x%X) already on list", + TYPENAME(wk->wk_type), wk->wk_state); wk->wk_state |= ONWORKLIST; if (LIST_EMPTY(&ump->softdep_workitem_pending)) LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list); @@ -807,6 +1298,30 @@ add_to_worklist(wk) LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list); ump->softdep_worklist_tail = wk; ump->softdep_on_worklist += 1; + if (nodelay) + worklist_speedup(); +} + +/* + * Remove the item to be processed. If we are removing the last + * item on the list, we need to recalculate the tail pointer. + */ +static void +remove_from_worklist(wk) + struct worklist *wk; +{ + struct ufsmount *ump; + struct worklist *wkend; + + ump = VFSTOUFS(wk->wk_mp); + WORKLIST_REMOVE(wk); + if (wk == ump->softdep_worklist_tail) { + LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list) + if (LIST_NEXT(wkend, wk_list) == NULL) + break; + ump->softdep_worklist_tail = wkend; + } + ump->softdep_on_worklist -= 1; } /* @@ -838,8 +1353,9 @@ softdep_process_worklist(mp, full) ACQUIRE_LOCK(&lk); loopcount = 1; starttime = time_second; + softdep_process_journal(mp, full?MNT_WAIT:0); while (ump->softdep_on_worklist > 0) { - if ((cnt = process_worklist_item(mp, 0)) == -1) + if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1) break; else matchcnt += cnt; @@ -871,15 +1387,60 @@ softdep_process_worklist(mp, full) * second. Otherwise the other mountpoints may get * excessively backlogged. */ - if (!full && starttime != time_second) { - matchcnt = -1; + if (!full && starttime != time_second) break; - } } FREE_LOCK(&lk); return (matchcnt); } +/* + * Process all removes associated with a vnode if we are running out of + * journal space. Any other process which attempts to flush these will + * be unable as we have the vnodes locked. + */ +static void +process_removes(vp) + struct vnode *vp; +{ + struct inodedep *inodedep; + struct dirrem *dirrem; + struct mount *mp; + ino_t inum; + + mtx_assert(&lk, MA_OWNED); + + mp = vp->v_mount; + inum = VTOI(vp)->i_number; + for (;;) { + if (inodedep_lookup(mp, inum, 0, &inodedep) == 0) + return; + LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) + if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == + (COMPLETE | ONWORKLIST)) + break; + if (dirrem == NULL) + return; + /* + * If another thread is trying to lock this vnode it will + * fail but we must wait for it to do so before we can + * proceed. + */ + if (dirrem->dm_state & INPROGRESS) { + dirrem->dm_state |= IOWAITING; + msleep(&dirrem->dm_list, &lk, PVM, "pwrwait", 0); + continue; + } + remove_from_worklist(&dirrem->dm_list); + FREE_LOCK(&lk); + if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) + panic("process_removes: suspended filesystem"); + handle_workitem_remove(dirrem, vp); + vn_finished_secondary_write(mp); + ACQUIRE_LOCK(&lk); + } +} + /* * Process one item on the worklist. */ @@ -888,7 +1449,7 @@ process_worklist_item(mp, flags) struct mount *mp; int flags; { - struct worklist *wk, *wkend; + struct worklist *wk, *wkXXX; struct ufsmount *ump; struct vnode *vp; int matchcnt = 0; @@ -908,11 +1469,14 @@ process_worklist_item(mp, flags) * inodes, we have to skip over any dirrem requests whose * vnodes are resident and locked. */ - ump = VFSTOUFS(mp); vp = NULL; + ump = VFSTOUFS(mp); LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) { - if (wk->wk_state & INPROGRESS) + if (wk->wk_state & INPROGRESS) { + wkXXX = wk; continue; + } + wkXXX = wk; /* Record the last valid wk pointer. */ if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM) break; wk->wk_state |= INPROGRESS; @@ -921,6 +1485,10 @@ process_worklist_item(mp, flags) ffs_vgetf(mp, WK_DIRREM(wk)->dm_oldinum, LK_NOWAIT | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ); ACQUIRE_LOCK(&lk); + if (wk->wk_state & IOWAITING) { + wk->wk_state &= ~IOWAITING; + wakeup(wk); + } wk->wk_state &= ~INPROGRESS; ump->softdep_on_worklist_inprogress--; if (vp != NULL) @@ -928,21 +1496,7 @@ process_worklist_item(mp, flags) } if (wk == 0) return (-1); - /* - * Remove the item to be processed. If we are removing the last - * item on the list, we need to recalculate the tail pointer. - * As this happens rarely and usually when the list is short, - * we just run down the list to find it rather than tracking it - * in the above loop. - */ - WORKLIST_REMOVE(wk); - if (wk == ump->softdep_worklist_tail) { - LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list) - if (LIST_NEXT(wkend, wk_list) == NULL) - break; - ump->softdep_worklist_tail = wkend; - } - ump->softdep_on_worklist -= 1; + remove_from_worklist(wk); FREE_LOCK(&lk); if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) panic("process_worklist_item: suspended filesystem"); @@ -952,6 +1506,8 @@ process_worklist_item(mp, flags) case D_DIRREM: /* removal of a directory entry */ handle_workitem_remove(WK_DIRREM(wk), vp); + if (vp) + vput(vp); break; case D_FREEBLKS: @@ -969,6 +1525,11 @@ process_worklist_item(mp, flags) handle_workitem_freefile(WK_FREEFILE(wk)); break; + case D_FREEWORK: + /* Final block in an indirect was freed. */ + handle_workitem_indirblk(WK_FREEWORK(wk)); + break; + default: panic("%s_process_worklist: Unknown type %s", "softdep", TYPENAME(wk->wk_type)); @@ -982,19 +1543,22 @@ process_worklist_item(mp, flags) /* * Move dependencies from one buffer to another. */ -void +int softdep_move_dependencies(oldbp, newbp) struct buf *oldbp; struct buf *newbp; { struct worklist *wk, *wktail; + int dirty; - if (!LIST_EMPTY(&newbp->b_dep)) - panic("softdep_move_dependencies: need merge code"); - wktail = 0; + dirty = 0; + wktail = NULL; ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) { LIST_REMOVE(wk, wk_list); + if (wk->wk_type == D_BMSAFEMAP && + bmsafemap_rollbacks(WK_BMSAFEMAP(wk))) + dirty = 1; if (wktail == 0) LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); else @@ -1002,6 +1566,8 @@ softdep_move_dependencies(oldbp, newbp) wktail = wk; } FREE_LOCK(&lk); + + return (dirty); } /* @@ -1198,23 +1764,22 @@ pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp) * This routine must be called with splbio interrupts blocked. */ static int -pagedep_lookup(ip, lbn, flags, pagedeppp) - struct inode *ip; +pagedep_lookup(mp, ino, lbn, flags, pagedeppp) + struct mount *mp; + ino_t ino; ufs_lbn_t lbn; int flags; struct pagedep **pagedeppp; { struct pagedep *pagedep; struct pagedep_hashhead *pagedephd; - struct mount *mp; int ret; int i; mtx_assert(&lk, MA_OWNED); - mp = ITOV(ip)->v_mount; - pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn); + pagedephd = PAGEDEP_HASH(mp, ino, lbn); - ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp); + ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); if (*pagedeppp || (flags & DEPALLOC) == 0) return (ret); FREE_LOCK(&lk); @@ -1222,12 +1787,12 @@ pagedep_lookup(ip, lbn, flags, pagedeppp) M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp); ACQUIRE_LOCK(&lk); - ret = pagedep_find(pagedephd, ip->i_number, lbn, mp, flags, pagedeppp); + ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); if (*pagedeppp) { WORKITEM_FREE(pagedep, D_PAGEDEP); return (ret); } - pagedep->pd_ino = ip->i_number; + pagedep->pd_ino = ino; pagedep->pd_lbn = lbn; LIST_INIT(&pagedep->pd_dirremhd); LIST_INIT(&pagedep->pd_pendinghd); @@ -1314,10 +1879,14 @@ inodedep_lookup(mp, inum, flags, inodedeppp) inodedep->id_savedino1 = NULL; inodedep->id_savedsize = -1; inodedep->id_savedextsize = -1; - inodedep->id_buf = NULL; + inodedep->id_savednlink = -1; + inodedep->id_bmsafemap = NULL; + inodedep->id_mkdiradd = NULL; + LIST_INIT(&inodedep->id_dirremhd); LIST_INIT(&inodedep->id_pendinghd); LIST_INIT(&inodedep->id_inowait); LIST_INIT(&inodedep->id_bufwait); + TAILQ_INIT(&inodedep->id_inoreflst); TAILQ_INIT(&inodedep->id_inoupdt); TAILQ_INIT(&inodedep->id_newinoupdt); TAILQ_INIT(&inodedep->id_extupdt); @@ -1336,17 +1905,29 @@ u_long newblk_hash; /* size of hash table - 1 */ (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash]) static int -newblk_find(newblkhd, fs, newblkno, newblkpp) +newblk_find(newblkhd, mp, newblkno, flags, newblkpp) struct newblk_hashhead *newblkhd; - struct fs *fs; + struct mount *mp; ufs2_daddr_t newblkno; + int flags; struct newblk **newblkpp; { struct newblk *newblk; - LIST_FOREACH(newblk, newblkhd, nb_hash) - if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs) - break; + LIST_FOREACH(newblk, newblkhd, nb_hash) { + if (newblkno != newblk->nb_newblkno) + continue; + if (mp != newblk->nb_list.wk_mp) + continue; + /* + * If we're creating a new dependency don't match those that + * have already been converted to allocdirects. This is for + * a frag extend. + */ + if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK) + continue; + break; + } if (newblk) { *newblkpp = newblk; return (1); @@ -1361,8 +1942,8 @@ newblk_find(newblkhd, fs, newblkno, newblkpp) * Found or allocated entry is returned in newblkpp. */ static int -newblk_lookup(fs, newblkno, flags, newblkpp) - struct fs *fs; +newblk_lookup(mp, newblkno, flags, newblkpp) + struct mount *mp; ufs2_daddr_t newblkno; int flags; struct newblk **newblkpp; @@ -1370,21 +1951,25 @@ newblk_lookup(fs, newblkno, flags, newblkpp) struct newblk *newblk; struct newblk_hashhead *newblkhd; - newblkhd = NEWBLK_HASH(fs, newblkno); - if (newblk_find(newblkhd, fs, newblkno, newblkpp)) + newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno); + if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) return (1); if ((flags & DEPALLOC) == 0) return (0); FREE_LOCK(&lk); - newblk = malloc(sizeof(struct newblk), - M_NEWBLK, M_SOFTDEP_FLAGS); + newblk = malloc(sizeof(union allblk), M_NEWBLK, + M_SOFTDEP_FLAGS | M_ZERO); + workitem_alloc(&newblk->nb_list, D_NEWBLK, mp); ACQUIRE_LOCK(&lk); - if (newblk_find(newblkhd, fs, newblkno, newblkpp)) { - free(newblk, M_NEWBLK); + if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) { + WORKITEM_FREE(newblk, D_NEWBLK); return (1); } - newblk->nb_state = 0; - newblk->nb_fs = fs; + newblk->nb_freefrag = NULL; + LIST_INIT(&newblk->nb_indirdeps); + LIST_INIT(&newblk->nb_newdirblk); + LIST_INIT(&newblk->nb_jwork); + newblk->nb_state = ATTACHED; newblk->nb_newblkno = newblkno; LIST_INSERT_HEAD(newblkhd, newblk, nb_hash); *newblkpp = newblk; @@ -1401,10 +1986,10 @@ softdep_initialize() LIST_INIT(&mkdirlisthd); max_softdeps = desiredvnodes * 4; - pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, - &pagedep_hash); + pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash); inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash); - newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash); + newblk_hashtbl = hashinit(desiredvnodes / 5, M_NEWBLK, &newblk_hash); + bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash); /* initialise bioops hack */ bioops.io_start = softdep_disk_io_initiation; @@ -1428,6 +2013,7 @@ softdep_uninitialize() hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash); hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash); hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash); + hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash); } /* @@ -1457,9 +2043,16 @@ softdep_mount(devvp, mp, fs, cred) MNT_IUNLOCK(mp); ump = VFSTOUFS(mp); LIST_INIT(&ump->softdep_workitem_pending); + LIST_INIT(&ump->softdep_journal_pending); + TAILQ_INIT(&ump->softdep_unlinked); ump->softdep_worklist_tail = NULL; ump->softdep_on_worklist = 0; ump->softdep_deps = 0; + if ((fs->fs_flags & FS_SUJ) && + (error = journal_mount(mp, fs, cred)) != 0) { + printf("Failed to start journal: %d\n", error); + return (error); + } /* * When doing soft updates, the counters in the * superblock may have gotten out of sync. Recomputation @@ -1493,6 +2086,2021 @@ softdep_mount(devvp, mp, fs, cred) return (0); } +void +softdep_unmount(mp) + struct mount *mp; +{ + + if (mp->mnt_kern_flag & MNTK_SUJ) + journal_unmount(mp); +} + +struct jblocks { + struct jseglst jb_segs; /* TAILQ of current segments. */ + struct jseg *jb_writeseg; /* Next write to complete. */ + struct jextent *jb_extent; /* Extent array. */ + uint64_t jb_nextseq; /* Next sequence number. */ + uint64_t jb_oldestseq; /* Oldest active sequence number. */ + int jb_avail; /* Available extents. */ + int jb_used; /* Last used extent. */ + int jb_head; /* Allocator head. */ + int jb_off; /* Allocator extent offset. */ + int jb_blocks; /* Total disk blocks covered. */ + int jb_free; /* Total disk blocks free. */ + int jb_min; /* Minimum free space. */ + int jb_low; /* Low on space. */ + int jb_age; /* Insertion time of oldest rec. */ + int jb_suspended; /* Did journal suspend writes? */ +}; + +struct jextent { + ufs2_daddr_t je_daddr; /* Disk block address. */ + int je_blocks; /* Disk block count. */ +}; + +static struct jblocks * +jblocks_create(void) +{ + struct jblocks *jblocks; + + jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO); + TAILQ_INIT(&jblocks->jb_segs); + jblocks->jb_avail = 10; + jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail, + M_JBLOCKS, M_WAITOK | M_ZERO); + + return (jblocks); +} + +static ufs2_daddr_t +jblocks_alloc(jblocks, bytes, actual) + struct jblocks *jblocks; + int bytes; + int *actual; +{ + ufs2_daddr_t daddr; + struct jextent *jext; + int freecnt; + int blocks; + + blocks = bytes / DEV_BSIZE; + jext = &jblocks->jb_extent[jblocks->jb_head]; + freecnt = jext->je_blocks - jblocks->jb_off; + if (freecnt == 0) { + jblocks->jb_off = 0; + if (++jblocks->jb_head > jblocks->jb_used) + jblocks->jb_head = 0; + jext = &jblocks->jb_extent[jblocks->jb_head]; + freecnt = jext->je_blocks; + } + if (freecnt > blocks) + freecnt = blocks; + *actual = freecnt * DEV_BSIZE; + daddr = jext->je_daddr + jblocks->jb_off; + jblocks->jb_off += freecnt; + jblocks->jb_free -= freecnt; + + return (daddr); +} + +static void +jblocks_free(jblocks, mp, bytes) + struct jblocks *jblocks; + struct mount *mp; + int bytes; +{ + + jblocks->jb_free += bytes / DEV_BSIZE; + if (jblocks->jb_suspended) + worklist_speedup(); + wakeup(jblocks); +} + +static void +jblocks_destroy(jblocks) + struct jblocks *jblocks; +{ + + if (jblocks->jb_extent) + free(jblocks->jb_extent, M_JBLOCKS); + free(jblocks, M_JBLOCKS); +} + +static void +jblocks_add(jblocks, daddr, blocks) + struct jblocks *jblocks; + ufs2_daddr_t daddr; + int blocks; +{ + struct jextent *jext; + + jblocks->jb_blocks += blocks; + jblocks->jb_free += blocks; + jext = &jblocks->jb_extent[jblocks->jb_used]; + /* Adding the first block. */ + if (jext->je_daddr == 0) { + jext->je_daddr = daddr; + jext->je_blocks = blocks; + return; + } + /* Extending the last extent. */ + if (jext->je_daddr + jext->je_blocks == daddr) { + jext->je_blocks += blocks; + return; + } + /* Adding a new extent. */ + if (++jblocks->jb_used == jblocks->jb_avail) { + jblocks->jb_avail *= 2; + jext = malloc(sizeof(struct jextent) * jblocks->jb_avail, + M_JBLOCKS, M_WAITOK | M_ZERO); + memcpy(jext, jblocks->jb_extent, + sizeof(struct jextent) * jblocks->jb_used); + free(jblocks->jb_extent, M_JBLOCKS); + jblocks->jb_extent = jext; + } + jext = &jblocks->jb_extent[jblocks->jb_used]; + jext->je_daddr = daddr; + jext->je_blocks = blocks; + return; +} + +int +softdep_journal_lookup(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct componentname cnp; + struct vnode *dvp; + ino_t sujournal; + int error; + + error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp); + if (error) + return (error); + bzero(&cnp, sizeof(cnp)); + cnp.cn_nameiop = LOOKUP; + cnp.cn_flags = ISLASTCN; + cnp.cn_thread = curthread; + cnp.cn_cred = curthread->td_ucred; + cnp.cn_pnbuf = SUJ_FILE; + cnp.cn_nameptr = SUJ_FILE; + cnp.cn_namelen = strlen(SUJ_FILE); + error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal); + vput(dvp); + if (error != 0) + return (error); + error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp); + return (error); +} + +/* + * Open and verify the journal file. + */ +static int +journal_mount(mp, fs, cred) + struct mount *mp; + struct fs *fs; + struct ucred *cred; +{ + struct jblocks *jblocks; + struct vnode *vp; + struct inode *ip; + ufs2_daddr_t blkno; + int bcount; + int error; + int i; + + mp->mnt_kern_flag |= MNTK_SUJ; + error = softdep_journal_lookup(mp, &vp); + if (error != 0) { + printf("Failed to find journal. Use tunefs to create one\n"); + return (error); + } + ip = VTOI(vp); + if (ip->i_size < SUJ_MIN) { + error = ENOSPC; + goto out; + } + bcount = lblkno(fs, ip->i_size); /* Only use whole blocks. */ + jblocks = jblocks_create(); + for (i = 0; i < bcount; i++) { + error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL); + if (error) + break; + jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag)); + } + if (error) { + jblocks_destroy(jblocks); + goto out; + } + jblocks->jb_low = jblocks->jb_free / 3; /* Reserve 33%. */ + jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */ + /* + * Only validate the journal contents if the filesystem is clean, + * otherwise we write the logs but they'll never be used. If the + * filesystem was still dirty when we mounted it the journal is + * invalid and a new journal can only be valid if it starts from a + * clean mount. + */ + if (fs->fs_clean) { + DIP_SET(ip, i_modrev, fs->fs_mtime); + ip->i_flags |= IN_MODIFIED; + ffs_update(vp, 1); + } + VFSTOUFS(mp)->softdep_jblocks = jblocks; +out: + vput(vp); + return (error); +} + +static void +journal_unmount(mp) + struct mount *mp; +{ + struct ufsmount *ump; + + ump = VFSTOUFS(mp); + if (ump->softdep_jblocks) + jblocks_destroy(ump->softdep_jblocks); + ump->softdep_jblocks = NULL; +} + +/* + * Called when a journal record is ready to be written. Space is allocated + * and the journal entry is created when the journal is flushed to stable + * store. + */ +static void +add_to_journal(wk) + struct worklist *wk; +{ + struct ufsmount *ump; + + mtx_assert(&lk, MA_OWNED); + ump = VFSTOUFS(wk->wk_mp); + if (wk->wk_state & ONWORKLIST) + panic("add_to_journal: %s(0x%X) already on list", + TYPENAME(wk->wk_type), wk->wk_state); + wk->wk_state |= ONWORKLIST | DEPCOMPLETE; + if (LIST_EMPTY(&ump->softdep_journal_pending)) { + ump->softdep_jblocks->jb_age = ticks; + LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list); + } else + LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list); + ump->softdep_journal_tail = wk; + ump->softdep_on_journal += 1; +} + +/* + * Remove an arbitrary item for the journal worklist maintain the tail + * pointer. This happens when a new operation obviates the need to + * journal an old operation. + */ +static void +remove_from_journal(wk) + struct worklist *wk; +{ + struct ufsmount *ump; + + mtx_assert(&lk, MA_OWNED); + ump = VFSTOUFS(wk->wk_mp); +#ifdef DEBUG /* XXX Expensive, temporary. */ + { + struct worklist *wkn; + + LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list) + if (wkn == wk) + break; + if (wkn == NULL) + panic("remove_from_journal: %p is not in journal", wk); + } +#endif + /* + * We emulate a TAILQ to save space in most structures which do not + * require TAILQ semantics. Here we must update the tail position + * when removing the tail which is not the final entry. + */ + if (ump->softdep_journal_tail == wk) + ump->softdep_journal_tail = + (struct worklist *)wk->wk_list.le_prev; + + WORKLIST_REMOVE(wk); + ump->softdep_on_journal -= 1; +} + +/* + * Check for journal space as well as dependency limits so the prelink + * code can throttle both journaled and non-journaled filesystems. + * Threshold is 0 for low and 1 for min. + */ +static int +journal_space(ump, thresh) + struct ufsmount *ump; + int thresh; +{ + struct jblocks *jblocks; + int avail; + + /* + * We use a tighter restriction here to prevent request_cleanup() + * running in threads from running into locks we currently hold. + */ + if (num_inodedep > (max_softdeps / 10) * 9) + return (0); + + jblocks = ump->softdep_jblocks; + if (jblocks == NULL) + return (1); + if (thresh) + thresh = jblocks->jb_min; + else + thresh = jblocks->jb_low; + avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE; + avail = jblocks->jb_free - avail; + + return (avail > thresh); +} + +static void +journal_suspend(ump) + struct ufsmount *ump; +{ + struct jblocks *jblocks; + struct mount *mp; + + mp = UFSTOVFS(ump); + jblocks = ump->softdep_jblocks; + MNT_ILOCK(mp); + if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { + stat_journal_min++; + mp->mnt_kern_flag |= MNTK_SUSPEND; + mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc); + } + jblocks->jb_suspended = 1; + MNT_IUNLOCK(mp); +} + +/* + * Called before any allocation function to be certain that there is + * sufficient space in the journal prior to creating any new records. + * Since in the case of block allocation we may have multiple locked + * buffers at the time of the actual allocation we can not block + * when the journal records are created. Doing so would create a deadlock + * if any of these buffers needed to be flushed to reclaim space. Instead + * we require a sufficiently large amount of available space such that + * each thread in the system could have passed this allocation check and + * still have sufficient free space. With 20% of a minimum journal size + * of 1MB we have 6553 records available. + */ +int +softdep_prealloc(vp, waitok) + struct vnode *vp; + int waitok; +{ + struct ufsmount *ump; + + if (DOINGSUJ(vp) == 0) + return (0); + ump = VFSTOUFS(vp->v_mount); + ACQUIRE_LOCK(&lk); + if (journal_space(ump, 0)) { + FREE_LOCK(&lk); + return (0); + } + stat_journal_low++; + FREE_LOCK(&lk); + if (waitok == MNT_NOWAIT) + return (ENOSPC); + /* + * Attempt to sync this vnode once to flush any journal + * work attached to it. + */ + if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0) + ffs_syncvnode(vp, waitok); + ACQUIRE_LOCK(&lk); + process_removes(vp); + if (journal_space(ump, 0) == 0) { + softdep_speedup(); + if (journal_space(ump, 1) == 0) + journal_suspend(ump); + } + FREE_LOCK(&lk); + + return (0); +} + +/* + * Before adjusting a link count on a vnode verify that we have sufficient + * journal space. If not, process operations that depend on the currently + * locked pair of vnodes to try to flush space as the syncer, buf daemon, + * and softdep flush threads can not acquire these locks to reclaim space. + */ +static void +softdep_prelink(dvp, vp) + struct vnode *dvp; + struct vnode *vp; +{ + struct ufsmount *ump; + + ump = VFSTOUFS(dvp->v_mount); + mtx_assert(&lk, MA_OWNED); + if (journal_space(ump, 0)) + return; + stat_journal_low++; + FREE_LOCK(&lk); + if (vp) + ffs_syncvnode(vp, MNT_NOWAIT); + ffs_syncvnode(dvp, MNT_WAIT); + ACQUIRE_LOCK(&lk); + /* Process vp before dvp as it may create .. removes. */ + if (vp) + process_removes(vp); + process_removes(dvp); + softdep_speedup(); + process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); + process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); + if (journal_space(ump, 0) == 0) { + softdep_speedup(); + if (journal_space(ump, 1) == 0) + journal_suspend(ump); + } +} + +static void +jseg_write(fs, jblocks, jseg, data) + struct fs *fs; + struct jblocks *jblocks; + struct jseg *jseg; + uint8_t *data; +{ + struct jsegrec *rec; + + rec = (struct jsegrec *)data; + rec->jsr_seq = jseg->js_seq; + rec->jsr_oldest = jblocks->jb_oldestseq; + rec->jsr_cnt = jseg->js_cnt; + rec->jsr_blocks = jseg->js_size / DEV_BSIZE; + rec->jsr_crc = 0; + rec->jsr_time = fs->fs_mtime; +} + +static inline void +inoref_write(inoref, jseg, rec) + struct inoref *inoref; + struct jseg *jseg; + struct jrefrec *rec; +{ + + inoref->if_jsegdep->jd_seg = jseg; + rec->jr_ino = inoref->if_ino; + rec->jr_parent = inoref->if_parent; + rec->jr_nlink = inoref->if_nlink; + rec->jr_mode = inoref->if_mode; + rec->jr_diroff = inoref->if_diroff; +} + +static void +jaddref_write(jaddref, jseg, data) + struct jaddref *jaddref; + struct jseg *jseg; + uint8_t *data; +{ + struct jrefrec *rec; + + rec = (struct jrefrec *)data; + rec->jr_op = JOP_ADDREF; + inoref_write(&jaddref->ja_ref, jseg, rec); +} + +static void +jremref_write(jremref, jseg, data) + struct jremref *jremref; + struct jseg *jseg; + uint8_t *data; +{ + struct jrefrec *rec; + + rec = (struct jrefrec *)data; + rec->jr_op = JOP_REMREF; + inoref_write(&jremref->jr_ref, jseg, rec); +} + +static void +jmvref_write(jmvref, jseg, data) + struct jmvref *jmvref; + struct jseg *jseg; + uint8_t *data; +{ + struct jmvrec *rec; + + rec = (struct jmvrec *)data; + rec->jm_op = JOP_MVREF; + rec->jm_ino = jmvref->jm_ino; + rec->jm_parent = jmvref->jm_parent; + rec->jm_oldoff = jmvref->jm_oldoff; + rec->jm_newoff = jmvref->jm_newoff; +} + +static void +jnewblk_write(jnewblk, jseg, data) + struct jnewblk *jnewblk; + struct jseg *jseg; + uint8_t *data; +{ + struct jblkrec *rec; + + jnewblk->jn_jsegdep->jd_seg = jseg; + rec = (struct jblkrec *)data; + rec->jb_op = JOP_NEWBLK; + rec->jb_ino = jnewblk->jn_ino; + rec->jb_blkno = jnewblk->jn_blkno; + rec->jb_lbn = jnewblk->jn_lbn; + rec->jb_frags = jnewblk->jn_frags; + rec->jb_oldfrags = jnewblk->jn_oldfrags; +} + +static void +jfreeblk_write(jfreeblk, jseg, data) + struct jfreeblk *jfreeblk; + struct jseg *jseg; + uint8_t *data; +{ + struct jblkrec *rec; + + jfreeblk->jf_jsegdep->jd_seg = jseg; + rec = (struct jblkrec *)data; + rec->jb_op = JOP_FREEBLK; + rec->jb_ino = jfreeblk->jf_ino; + rec->jb_blkno = jfreeblk->jf_blkno; + rec->jb_lbn = jfreeblk->jf_lbn; + rec->jb_frags = jfreeblk->jf_frags; + rec->jb_oldfrags = 0; +} + +static void +jfreefrag_write(jfreefrag, jseg, data) + struct jfreefrag *jfreefrag; + struct jseg *jseg; + uint8_t *data; +{ + struct jblkrec *rec; + + jfreefrag->fr_jsegdep->jd_seg = jseg; + rec = (struct jblkrec *)data; + rec->jb_op = JOP_FREEBLK; + rec->jb_ino = jfreefrag->fr_ino; + rec->jb_blkno = jfreefrag->fr_blkno; + rec->jb_lbn = jfreefrag->fr_lbn; + rec->jb_frags = jfreefrag->fr_frags; + rec->jb_oldfrags = 0; +} + +static void +jtrunc_write(jtrunc, jseg, data) + struct jtrunc *jtrunc; + struct jseg *jseg; + uint8_t *data; +{ + struct jtrncrec *rec; + + rec = (struct jtrncrec *)data; + rec->jt_op = JOP_TRUNC; + rec->jt_ino = jtrunc->jt_ino; + rec->jt_size = jtrunc->jt_size; + rec->jt_extsize = jtrunc->jt_extsize; +} + +/* + * Flush some journal records to disk. + */ +static void +softdep_process_journal(mp, flags) + struct mount *mp; + int flags; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + struct worklist *wk; + struct jseg *jseg; + struct buf *bp; + uint8_t *data; + struct fs *fs; + int segwritten; + int jrecmin; /* Minimum records per block. */ + int jrecmax; /* Maximum records per block. */ + int size; + int cnt; + int off; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + fs = ump->um_fs; + jblocks = ump->softdep_jblocks; + /* + * We write anywhere between a disk block and fs block. The upper + * bound is picked to prevent buffer cache fragmentation and limit + * processing time per I/O. + */ + jrecmin = (DEV_BSIZE / JREC_SIZE) - 1; /* -1 for seg header */ + jrecmax = (fs->fs_bsize / DEV_BSIZE) * jrecmin; + segwritten = 0; + while ((cnt = ump->softdep_on_journal) != 0) { + /* + * Create a new segment to hold as many as 'cnt' journal + * entries and add them to the segment. Notice cnt is + * off by one to account for the space required by the + * jsegrec. If we don't have a full block to log skip it + * unless we haven't written anything. + */ + cnt++; + if (cnt < jrecmax && segwritten) + break; + /* + * Verify some free journal space. softdep_prealloc() should + * guarantee that we don't run out so this is indicative of + * a problem with the flow control. Try to recover + * gracefully in any event. + */ + while (jblocks->jb_free == 0) { + if (flags != MNT_WAIT) + break; + printf("softdep: Out of journal space!\n"); + softdep_speedup(); + msleep(jblocks, &lk, PRIBIO, "jblocks", 1); + } + FREE_LOCK(&lk); + jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS); + workitem_alloc(&jseg->js_list, D_JSEG, mp); + LIST_INIT(&jseg->js_entries); + jseg->js_state = ATTACHED; + jseg->js_jblocks = jblocks; + bp = geteblk(fs->fs_bsize, 0); + ACQUIRE_LOCK(&lk); + /* + * If there was a race while we were allocating the block + * and jseg the entry we care about was likely written. + * We bail out in both the WAIT and NOWAIT case and assume + * the caller will loop if the entry it cares about is + * not written. + */ + if (ump->softdep_on_journal == 0 || jblocks->jb_free == 0) { + bp->b_flags |= B_INVAL | B_NOCACHE; + WORKITEM_FREE(jseg, D_JSEG); + FREE_LOCK(&lk); + brelse(bp); + ACQUIRE_LOCK(&lk); + break; + } + /* + * Calculate the disk block size required for the available + * records rounded to the min size. + */ + cnt = ump->softdep_on_journal; + if (cnt < jrecmax) + size = howmany(cnt, jrecmin) * DEV_BSIZE; + else + size = fs->fs_bsize; + /* + * Allocate a disk block for this journal data and account + * for truncation of the requested size if enough contiguous + * space was not available. + */ + bp->b_blkno = jblocks_alloc(jblocks, size, &size); + bp->b_lblkno = bp->b_blkno; + bp->b_offset = bp->b_blkno * DEV_BSIZE; + bp->b_bcount = size; + bp->b_bufobj = &ump->um_devvp->v_bufobj; + bp->b_flags &= ~B_INVAL; + bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY; + /* + * Initialize our jseg with cnt records. Assign the next + * sequence number to it and link it in-order. + */ + cnt = MIN(ump->softdep_on_journal, + (size / DEV_BSIZE) * jrecmin); + jseg->js_buf = bp; + jseg->js_cnt = cnt; + jseg->js_refs = cnt + 1; /* Self ref. */ + jseg->js_size = size; + jseg->js_seq = jblocks->jb_nextseq++; + if (TAILQ_EMPTY(&jblocks->jb_segs)) + jblocks->jb_oldestseq = jseg->js_seq; + TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next); + if (jblocks->jb_writeseg == NULL) + jblocks->jb_writeseg = jseg; + /* + * Start filling in records from the pending list. + */ + data = bp->b_data; + off = 0; + while ((wk = LIST_FIRST(&ump->softdep_journal_pending)) + != NULL) { + /* Place a segment header on every device block. */ + if ((off % DEV_BSIZE) == 0) { + jseg_write(fs, jblocks, jseg, data); + off += JREC_SIZE; + data = bp->b_data + off; + } + remove_from_journal(wk); + wk->wk_state |= IOSTARTED; + WORKLIST_INSERT(&jseg->js_entries, wk); + switch (wk->wk_type) { + case D_JADDREF: + jaddref_write(WK_JADDREF(wk), jseg, data); + break; + case D_JREMREF: + jremref_write(WK_JREMREF(wk), jseg, data); + break; + case D_JMVREF: + jmvref_write(WK_JMVREF(wk), jseg, data); + break; + case D_JNEWBLK: + jnewblk_write(WK_JNEWBLK(wk), jseg, data); + break; + case D_JFREEBLK: + jfreeblk_write(WK_JFREEBLK(wk), jseg, data); + break; + case D_JFREEFRAG: + jfreefrag_write(WK_JFREEFRAG(wk), jseg, data); + break; + case D_JTRUNC: + jtrunc_write(WK_JTRUNC(wk), jseg, data); + break; + default: + panic("process_journal: Unknown type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + if (--cnt == 0) + break; + off += JREC_SIZE; + data = bp->b_data + off; + } + /* + * Write this one buffer and continue. + */ + WORKLIST_INSERT(&bp->b_dep, &jseg->js_list); + FREE_LOCK(&lk); + BO_LOCK(bp->b_bufobj); + bgetvp(ump->um_devvp, bp); + BO_UNLOCK(bp->b_bufobj); + if (flags == MNT_NOWAIT) + bawrite(bp); + else + bwrite(bp); + ACQUIRE_LOCK(&lk); + } + /* + * If we've suspended the filesystem because we ran out of journal + * space either try to sync it here to make some progress or + * unsuspend it if we already have. + */ + if (flags == 0 && jblocks && jblocks->jb_suspended) { + if (journal_space(ump, jblocks->jb_min)) { + FREE_LOCK(&lk); + jblocks->jb_suspended = 0; + mp->mnt_susp_owner = curthread; + vfs_write_resume(mp); + ACQUIRE_LOCK(&lk); + return; + } + FREE_LOCK(&lk); + VFS_SYNC(mp, MNT_NOWAIT); + ffs_sbupdate(ump, MNT_WAIT, 0); + ACQUIRE_LOCK(&lk); + } +} + +/* + * Complete a jseg, allowing all dependencies awaiting journal writes + * to proceed. Each journal dependency also attaches a jsegdep to dependent + * structures so that the journal segment can be freed to reclaim space. + */ +static void +complete_jseg(jseg) + struct jseg *jseg; +{ + struct worklist *wk; + struct jmvref *jmvref; + int waiting; + int i; + + i = 0; + while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) { + WORKLIST_REMOVE(wk); + waiting = wk->wk_state & IOWAITING; + wk->wk_state &= ~(IOSTARTED | IOWAITING); + wk->wk_state |= COMPLETE; + KASSERT(i < jseg->js_cnt, + ("handle_written_jseg: overflow %d >= %d", + i, jseg->js_cnt)); + switch (wk->wk_type) { + case D_JADDREF: + handle_written_jaddref(WK_JADDREF(wk)); + break; + case D_JREMREF: + handle_written_jremref(WK_JREMREF(wk)); + break; + case D_JMVREF: + /* No jsegdep here. */ + free_jseg(jseg); + jmvref = WK_JMVREF(wk); + LIST_REMOVE(jmvref, jm_deps); + free_pagedep(jmvref->jm_pagedep); + WORKITEM_FREE(jmvref, D_JMVREF); + break; + case D_JNEWBLK: + handle_written_jnewblk(WK_JNEWBLK(wk)); + break; + case D_JFREEBLK: + handle_written_jfreeblk(WK_JFREEBLK(wk)); + break; + case D_JFREEFRAG: + handle_written_jfreefrag(WK_JFREEFRAG(wk)); + break; + case D_JTRUNC: + WK_JTRUNC(wk)->jt_jsegdep->jd_seg = jseg; + WORKITEM_FREE(wk, D_JTRUNC); + break; + default: + panic("handle_written_jseg: Unknown type %s", + TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + if (waiting) + wakeup(wk); + } + /* Release the self reference so the structure may be freed. */ + free_jseg(jseg); +} + +/* + * Mark a jseg as DEPCOMPLETE and throw away the buffer. Handle jseg + * completions in order only. + */ +static void +handle_written_jseg(jseg, bp) + struct jseg *jseg; + struct buf *bp; +{ + struct jblocks *jblocks; + struct jseg *jsegn; + + if (jseg->js_refs == 0) + panic("handle_written_jseg: No self-reference on %p", jseg); + jseg->js_state |= DEPCOMPLETE; + /* + * We'll never need this buffer again, set flags so it will be + * discarded. + */ + bp->b_flags |= B_INVAL | B_NOCACHE; + jblocks = jseg->js_jblocks; + /* + * Don't allow out of order completions. If this isn't the first + * block wait for it to write before we're done. + */ + if (jseg != jblocks->jb_writeseg) + return; + /* Iterate through available jsegs processing their entries. */ + do { + jsegn = TAILQ_NEXT(jseg, js_next); + complete_jseg(jseg); + jseg = jsegn; + } while (jseg && jseg->js_state & DEPCOMPLETE); + jblocks->jb_writeseg = jseg; +} + +static inline struct jsegdep * +inoref_jseg(inoref) + struct inoref *inoref; +{ + struct jsegdep *jsegdep; + + jsegdep = inoref->if_jsegdep; + inoref->if_jsegdep = NULL; + + return (jsegdep); +} + +/* + * Called once a jremref has made it to stable store. The jremref is marked + * complete and we attempt to free it. Any pagedeps writes sleeping waiting + * for the jremref to complete will be awoken by free_jremref. + */ +static void +handle_written_jremref(jremref) + struct jremref *jremref; +{ + struct inodedep *inodedep; + struct jsegdep *jsegdep; + struct dirrem *dirrem; + + /* Grab the jsegdep. */ + jsegdep = inoref_jseg(&jremref->jr_ref); + /* + * Remove us from the inoref list. + */ + if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, + 0, &inodedep) == 0) + panic("handle_written_jremref: Lost inodedep"); + TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps); + /* + * Complete the dirrem. + */ + dirrem = jremref->jr_dirrem; + jremref->jr_dirrem = NULL; + LIST_REMOVE(jremref, jr_deps); + jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT; + WORKLIST_INSERT(&dirrem->dm_jwork, &jsegdep->jd_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd) && + (dirrem->dm_state & COMPLETE) != 0) + add_to_worklist(&dirrem->dm_list, 0); + free_jremref(jremref); +} + +/* + * Called once a jaddref has made it to stable store. The dependency is + * marked complete and any dependent structures are added to the inode + * bufwait list to be completed as soon as it is written. If a bitmap write + * depends on this entry we move the inode into the inodedephd of the + * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap. + */ +static void +handle_written_jaddref(jaddref) + struct jaddref *jaddref; +{ + struct jsegdep *jsegdep; + struct inodedep *inodedep; + struct diradd *diradd; + struct mkdir *mkdir; + + /* Grab the jsegdep. */ + jsegdep = inoref_jseg(&jaddref->ja_ref); + mkdir = NULL; + diradd = NULL; + if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino, + 0, &inodedep) == 0) + panic("handle_written_jaddref: Lost inodedep."); + if (jaddref->ja_diradd == NULL) + panic("handle_written_jaddref: No dependency"); + if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) { + diradd = jaddref->ja_diradd; + WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list); + } else if (jaddref->ja_state & MKDIR_PARENT) { + mkdir = jaddref->ja_mkdir; + WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list); + } else if (jaddref->ja_state & MKDIR_BODY) + mkdir = jaddref->ja_mkdir; + else + panic("handle_written_jaddref: Unknown dependency %p", + jaddref->ja_diradd); + jaddref->ja_diradd = NULL; /* also clears ja_mkdir */ + /* + * Remove us from the inode list. + */ + TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps); + /* + * The mkdir may be waiting on the jaddref to clear before freeing. + */ + if (mkdir) { + KASSERT(mkdir->md_list.wk_type == D_MKDIR, + ("handle_written_jaddref: Incorrect type for mkdir %s", + TYPENAME(mkdir->md_list.wk_type))); + mkdir->md_jaddref = NULL; + diradd = mkdir->md_diradd; + mkdir->md_state |= DEPCOMPLETE; + complete_mkdir(mkdir); + } + WORKLIST_INSERT(&diradd->da_jwork, &jsegdep->jd_list); + if (jaddref->ja_state & NEWBLOCK) { + inodedep->id_state |= ONDEPLIST; + LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd, + inodedep, id_deps); + } + free_jaddref(jaddref); +} + +/* + * Called once a jnewblk journal is written. The allocdirect or allocindir + * is placed in the bmsafemap to await notification of a written bitmap. + */ +static void +handle_written_jnewblk(jnewblk) + struct jnewblk *jnewblk; +{ + struct bmsafemap *bmsafemap; + struct jsegdep *jsegdep; + struct newblk *newblk; + + /* Grab the jsegdep. */ + jsegdep = jnewblk->jn_jsegdep; + jnewblk->jn_jsegdep = NULL; + /* + * Add the written block to the bmsafemap so it can be notified when + * the bitmap is on disk. + */ + newblk = jnewblk->jn_newblk; + jnewblk->jn_newblk = NULL; + if (newblk == NULL) + panic("handle_written_jnewblk: No dependency for the segdep."); + + newblk->nb_jnewblk = NULL; + bmsafemap = newblk->nb_bmsafemap; + WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + free_jnewblk(jnewblk); +} + +/* + * Cancel a jfreefrag that won't be needed, probably due to colliding with + * an in-flight allocation that has not yet been committed. Divorce us + * from the freefrag and mark it DEPCOMPLETE so that it may be added + * to the worklist. + */ +static void +cancel_jfreefrag(jfreefrag) + struct jfreefrag *jfreefrag; +{ + struct freefrag *freefrag; + + if (jfreefrag->fr_jsegdep) { + free_jsegdep(jfreefrag->fr_jsegdep); + jfreefrag->fr_jsegdep = NULL; + } + freefrag = jfreefrag->fr_freefrag; + jfreefrag->fr_freefrag = NULL; + freefrag->ff_jfreefrag = NULL; + free_jfreefrag(jfreefrag); + freefrag->ff_state |= DEPCOMPLETE; +} + +/* + * Free a jfreefrag when the parent freefrag is rendered obsolete. + */ +static void +free_jfreefrag(jfreefrag) + struct jfreefrag *jfreefrag; +{ + + if (jfreefrag->fr_state & IOSTARTED) + WORKLIST_REMOVE(&jfreefrag->fr_list); + else if (jfreefrag->fr_state & ONWORKLIST) + remove_from_journal(&jfreefrag->fr_list); + if (jfreefrag->fr_freefrag != NULL) + panic("free_jfreefrag: Still attached to a freefrag."); + WORKITEM_FREE(jfreefrag, D_JFREEFRAG); +} + +/* + * Called when the journal write for a jfreefrag completes. The parent + * freefrag is added to the worklist if this completes its dependencies. + */ +static void +handle_written_jfreefrag(jfreefrag) + struct jfreefrag *jfreefrag; +{ + struct jsegdep *jsegdep; + struct freefrag *freefrag; + + /* Grab the jsegdep. */ + jsegdep = jfreefrag->fr_jsegdep; + jfreefrag->fr_jsegdep = NULL; + freefrag = jfreefrag->fr_freefrag; + if (freefrag == NULL) + panic("handle_written_jfreefrag: No freefrag."); + freefrag->ff_state |= DEPCOMPLETE; + freefrag->ff_jfreefrag = NULL; + WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list, 0); + jfreefrag->fr_freefrag = NULL; + free_jfreefrag(jfreefrag); +} + +/* + * Called when the journal write for a jfreeblk completes. The jfreeblk + * is removed from the freeblks list of pending journal writes and the + * jsegdep is moved to the freeblks jwork to be completed when all blocks + * have been reclaimed. + */ +static void +handle_written_jfreeblk(jfreeblk) + struct jfreeblk *jfreeblk; +{ + struct freeblks *freeblks; + struct jsegdep *jsegdep; + + /* Grab the jsegdep. */ + jsegdep = jfreeblk->jf_jsegdep; + jfreeblk->jf_jsegdep = NULL; + freeblks = jfreeblk->jf_freeblks; + LIST_REMOVE(jfreeblk, jf_deps); + WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list); + /* + * If the freeblks is all journaled, we can add it to the worklist. + */ + if (LIST_EMPTY(&freeblks->fb_jfreeblkhd) && + (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) { + /* Remove from the b_dep that is waiting on this write. */ + if (freeblks->fb_state & ONWORKLIST) + WORKLIST_REMOVE(&freeblks->fb_list); + add_to_worklist(&freeblks->fb_list, 1); + } + + free_jfreeblk(jfreeblk); +} + +static struct jsegdep * +newjsegdep(struct worklist *wk) +{ + struct jsegdep *jsegdep; + + jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS); + workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp); + jsegdep->jd_seg = NULL; + + return (jsegdep); +} + +static struct jmvref * +newjmvref(dp, ino, oldoff, newoff) + struct inode *dp; + ino_t ino; + off_t oldoff; + off_t newoff; +{ + struct jmvref *jmvref; + + jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS); + workitem_alloc(&jmvref->jm_list, D_JMVREF, UFSTOVFS(dp->i_ump)); + jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE; + jmvref->jm_parent = dp->i_number; + jmvref->jm_ino = ino; + jmvref->jm_oldoff = oldoff; + jmvref->jm_newoff = newoff; + + return (jmvref); +} + +/* + * Allocate a new jremref that tracks the removal of ip from dp with the + * directory entry offset of diroff. Mark the entry as ATTACHED and + * DEPCOMPLETE as we have all the information required for the journal write + * and the directory has already been removed from the buffer. The caller + * is responsible for linking the jremref into the pagedep and adding it + * to the journal to write. The MKDIR_PARENT flag is set if we're doing + * a DOTDOT addition so handle_workitem_remove() can properly assign + * the jsegdep when we're done. + */ +static struct jremref * +newjremref(dirrem, dp, ip, diroff, nlink) + struct dirrem *dirrem; + struct inode *dp; + struct inode *ip; + off_t diroff; + nlink_t nlink; +{ + struct jremref *jremref; + + jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS); + workitem_alloc(&jremref->jr_list, D_JREMREF, UFSTOVFS(dp->i_ump)); + jremref->jr_state = ATTACHED; + newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff, + nlink, ip->i_mode); + jremref->jr_dirrem = dirrem; + + return (jremref); +} + +static inline void +newinoref(inoref, ino, parent, diroff, nlink, mode) + struct inoref *inoref; + ino_t ino; + ino_t parent; + off_t diroff; + nlink_t nlink; + uint16_t mode; +{ + + inoref->if_jsegdep = newjsegdep(&inoref->if_list); + inoref->if_diroff = diroff; + inoref->if_ino = ino; + inoref->if_parent = parent; + inoref->if_nlink = nlink; + inoref->if_mode = mode; +} + +/* + * Allocate a new jaddref to track the addition of ino to dp at diroff. The + * directory offset may not be known until later. The caller is responsible + * adding the entry to the journal when this information is available. nlink + * should be the link count prior to the addition and mode is only required + * to have the correct FMT. + */ +static struct jaddref * +newjaddref(dp, ino, diroff, nlink, mode) + struct inode *dp; + ino_t ino; + off_t diroff; + int16_t nlink; + uint16_t mode; +{ + struct jaddref *jaddref; + + jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS); + workitem_alloc(&jaddref->ja_list, D_JADDREF, UFSTOVFS(dp->i_ump)); + jaddref->ja_state = ATTACHED; + jaddref->ja_mkdir = NULL; + newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode); + + return (jaddref); +} + +/* + * Create a new free dependency for a freework. The caller is responsible + * for adjusting the reference count when it has the lock held. The freedep + * will track an outstanding bitmap write that will ultimately clear the + * freework to continue. + */ +static struct freedep * +newfreedep(struct freework *freework) +{ + struct freedep *freedep; + + freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS); + workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp); + freedep->fd_freework = freework; + + return (freedep); +} + +/* + * Free a freedep structure once the buffer it is linked to is written. If + * this is the last reference to the freework schedule it for completion. + */ +static void +free_freedep(freedep) + struct freedep *freedep; +{ + + if (--freedep->fd_freework->fw_ref == 0) + add_to_worklist(&freedep->fd_freework->fw_list, 1); + WORKITEM_FREE(freedep, D_FREEDEP); +} + +/* + * Allocate a new freework structure that may be a level in an indirect + * when parent is not NULL or a top level block when it is. The top level + * freework structures are allocated without lk held and before the freeblks + * is visible outside of softdep_setup_freeblocks(). + */ +static struct freework * +newfreework(freeblks, parent, lbn, nb, frags, journal) + struct freeblks *freeblks; + struct freework *parent; + ufs_lbn_t lbn; + ufs2_daddr_t nb; + int frags; + int journal; +{ + struct freework *freework; + + freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS); + workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp); + freework->fw_freeblks = freeblks; + freework->fw_parent = parent; + freework->fw_lbn = lbn; + freework->fw_blkno = nb; + freework->fw_frags = frags; + freework->fw_ref = 0; + freework->fw_off = 0; + LIST_INIT(&freework->fw_jwork); + + if (parent == NULL) { + WORKLIST_INSERT_UNLOCKED(&freeblks->fb_freeworkhd, + &freework->fw_list); + freeblks->fb_ref++; + } + if (journal) + newjfreeblk(freeblks, lbn, nb, frags); + + return (freework); +} + +/* + * Allocate a new jfreeblk to journal top level block pointer when truncating + * a file. The caller must add this to the worklist when lk is held. + */ +static struct jfreeblk * +newjfreeblk(freeblks, lbn, blkno, frags) + struct freeblks *freeblks; + ufs_lbn_t lbn; + ufs2_daddr_t blkno; + int frags; +{ + struct jfreeblk *jfreeblk; + + jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS); + workitem_alloc(&jfreeblk->jf_list, D_JFREEBLK, freeblks->fb_list.wk_mp); + jfreeblk->jf_jsegdep = newjsegdep(&jfreeblk->jf_list); + jfreeblk->jf_state = ATTACHED | DEPCOMPLETE; + jfreeblk->jf_ino = freeblks->fb_previousinum; + jfreeblk->jf_lbn = lbn; + jfreeblk->jf_blkno = blkno; + jfreeblk->jf_frags = frags; + jfreeblk->jf_freeblks = freeblks; + LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps); + + return (jfreeblk); +} + +static void move_newblock_dep(struct jaddref *, struct inodedep *); +/* + * If we're canceling a new bitmap we have to search for another ref + * to move into the bmsafemap dep. This might be better expressed + * with another structure. + */ +static void +move_newblock_dep(jaddref, inodedep) + struct jaddref *jaddref; + struct inodedep *inodedep; +{ + struct inoref *inoref; + struct jaddref *jaddrefn; + + jaddrefn = NULL; + for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref; + inoref = TAILQ_NEXT(inoref, if_deps)) { + if ((jaddref->ja_state & NEWBLOCK) && + inoref->if_list.wk_type == D_JADDREF) { + jaddrefn = (struct jaddref *)inoref; + break; + } + } + if (jaddrefn == NULL) + return; + jaddrefn->ja_state &= ~(ATTACHED | UNDONE); + jaddrefn->ja_state |= jaddref->ja_state & + (ATTACHED | UNDONE | NEWBLOCK); + jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK); + jaddref->ja_state |= ATTACHED; + LIST_REMOVE(jaddref, ja_bmdeps); + LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn, + ja_bmdeps); +} + +/* + * Cancel a jaddref either before it has been written or while it is being + * written. This happens when a link is removed before the add reaches + * the disk. The jaddref dependency is kept linked into the bmsafemap + * and inode to prevent the link count or bitmap from reaching the disk + * until handle_workitem_remove() re-adjusts the counts and bitmaps as + * required. + * + * Returns 1 if the canceled addref requires journaling of the remove and + * 0 otherwise. + */ +static int +cancel_jaddref(jaddref, inodedep, wkhd) + struct jaddref *jaddref; + struct inodedep *inodedep; + struct workhead *wkhd; +{ + struct inoref *inoref; + struct jsegdep *jsegdep; + int needsj; + + KASSERT((jaddref->ja_state & COMPLETE) == 0, + ("cancel_jaddref: Canceling complete jaddref")); + if (jaddref->ja_state & (IOSTARTED | COMPLETE)) + needsj = 1; + else + needsj = 0; + if (inodedep == NULL) + if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino, + 0, &inodedep) == 0) + panic("cancel_jaddref: Lost inodedep"); + /* + * We must adjust the nlink of any reference operation that follows + * us so that it is consistent with the in-memory reference. This + * ensures that inode nlink rollbacks always have the correct link. + */ + if (needsj == 0) + for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref; + inoref = TAILQ_NEXT(inoref, if_deps)) + inoref->if_nlink--; + jsegdep = inoref_jseg(&jaddref->ja_ref); + if (jaddref->ja_state & NEWBLOCK) + move_newblock_dep(jaddref, inodedep); + if (jaddref->ja_state & IOWAITING) { + jaddref->ja_state &= ~IOWAITING; + wakeup(&jaddref->ja_list); + } + jaddref->ja_mkdir = NULL; + if (jaddref->ja_state & IOSTARTED) { + jaddref->ja_state &= ~IOSTARTED; + WORKLIST_REMOVE(&jaddref->ja_list); + WORKLIST_INSERT(wkhd, &jsegdep->jd_list); + } else { + free_jsegdep(jsegdep); + if (jaddref->ja_state & DEPCOMPLETE) + remove_from_journal(&jaddref->ja_list); + } + /* + * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove + * can arrange for them to be freed with the bitmap. Otherwise we + * no longer need this addref attached to the inoreflst and it + * will incorrectly adjust nlink if we leave it. + */ + if ((jaddref->ja_state & NEWBLOCK) == 0) { + TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, + if_deps); + jaddref->ja_state |= COMPLETE; + free_jaddref(jaddref); + return (needsj); + } + jaddref->ja_state |= GOINGAWAY; + /* + * Leave the head of the list for jsegdeps for fast merging. + */ + if (LIST_FIRST(wkhd) != NULL) { + jaddref->ja_state |= ONWORKLIST; + LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list); + } else + WORKLIST_INSERT(wkhd, &jaddref->ja_list); + + return (needsj); +} + +/* + * Attempt to free a jaddref structure when some work completes. This + * should only succeed once the entry is written and all dependencies have + * been notified. + */ +static void +free_jaddref(jaddref) + struct jaddref *jaddref; +{ + + if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + if (jaddref->ja_ref.if_jsegdep) + panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n", + jaddref, jaddref->ja_state); + if (jaddref->ja_state & NEWBLOCK) + LIST_REMOVE(jaddref, ja_bmdeps); + if (jaddref->ja_state & (IOSTARTED | ONWORKLIST)) + panic("free_jaddref: Bad state %p(0x%X)", + jaddref, jaddref->ja_state); + if (jaddref->ja_mkdir != NULL) + panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state); + WORKITEM_FREE(jaddref, D_JADDREF); +} + +/* + * Free a jremref structure once it has been written or discarded. + */ +static void +free_jremref(jremref) + struct jremref *jremref; +{ + + if (jremref->jr_ref.if_jsegdep) + free_jsegdep(jremref->jr_ref.if_jsegdep); + if (jremref->jr_state & IOSTARTED) + panic("free_jremref: IO still pending"); + WORKITEM_FREE(jremref, D_JREMREF); +} + +/* + * Free a jnewblk structure. + */ +static void +free_jnewblk(jnewblk) + struct jnewblk *jnewblk; +{ + + if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + LIST_REMOVE(jnewblk, jn_deps); + if (jnewblk->jn_newblk != NULL) + panic("free_jnewblk: Dependency still attached."); + WORKITEM_FREE(jnewblk, D_JNEWBLK); +} + +/* + * Cancel a jnewblk which has been superseded by a freeblk. The jnewblk + * is kept linked into the bmsafemap until the free completes, thus + * preventing the modified state from ever reaching disk. The free + * routine must pass this structure via ffs_blkfree() to + * softdep_setup_freeblks() so there is no race in releasing the space. + */ +static void +cancel_jnewblk(jnewblk, wkhd) + struct jnewblk *jnewblk; + struct workhead *wkhd; +{ + struct jsegdep *jsegdep; + + jsegdep = jnewblk->jn_jsegdep; + jnewblk->jn_jsegdep = NULL; + free_jsegdep(jsegdep); + jnewblk->jn_newblk = NULL; + jnewblk->jn_state |= GOINGAWAY; + if (jnewblk->jn_state & IOSTARTED) { + jnewblk->jn_state &= ~IOSTARTED; + WORKLIST_REMOVE(&jnewblk->jn_list); + } else + remove_from_journal(&jnewblk->jn_list); + /* + * Leave the head of the list for jsegdeps for fast merging. + */ + if (LIST_FIRST(wkhd) != NULL) { + jnewblk->jn_state |= ONWORKLIST; + LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jnewblk->jn_list, wk_list); + } else + WORKLIST_INSERT(wkhd, &jnewblk->jn_list); + if (jnewblk->jn_state & IOWAITING) { + jnewblk->jn_state &= ~IOWAITING; + wakeup(&jnewblk->jn_list); + } +} + +static void +free_jfreeblk(jfreeblk) + struct jfreeblk *jfreeblk; +{ + + WORKITEM_FREE(jfreeblk, D_JFREEBLK); +} + +/* + * Release one reference to a jseg and free it if the count reaches 0. This + * should eventually reclaim journal space as well. + */ +static void +free_jseg(jseg) + struct jseg *jseg; +{ + struct jblocks *jblocks; + + KASSERT(jseg->js_refs > 0, + ("free_jseg: Invalid refcnt %d", jseg->js_refs)); + if (--jseg->js_refs != 0) + return; + /* + * Free only those jsegs which have none allocated before them to + * preserve the journal space ordering. + */ + jblocks = jseg->js_jblocks; + while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) { + jblocks->jb_oldestseq = jseg->js_seq; + if (jseg->js_refs != 0) + break; + TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next); + jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size); + KASSERT(LIST_EMPTY(&jseg->js_entries), + ("free_jseg: Freed jseg has valid entries.")); + WORKITEM_FREE(jseg, D_JSEG); + } +} + +/* + * Release a jsegdep and decrement the jseg count. + */ +static void +free_jsegdep(jsegdep) + struct jsegdep *jsegdep; +{ + + if (jsegdep->jd_seg) + free_jseg(jsegdep->jd_seg); + WORKITEM_FREE(jsegdep, D_JSEGDEP); +} + +/* + * Wait for a journal item to make it to disk. Initiate journal processing + * if required. + */ +static void +jwait(wk) + struct worklist *wk; +{ + + stat_journal_wait++; + /* + * If IO has not started we process the journal. We can't mark the + * worklist item as IOWAITING because we drop the lock while + * processing the journal and the worklist entry may be freed after + * this point. The caller may call back in and re-issue the request. + */ + if ((wk->wk_state & IOSTARTED) == 0) { + softdep_process_journal(wk->wk_mp, MNT_WAIT); + return; + } + wk->wk_state |= IOWAITING; + msleep(wk, &lk, PRIBIO, "jwait", 0); +} + +/* + * Lookup an inodedep based on an inode pointer and set the nlinkdelta as + * appropriate. This is a convenience function to reduce duplicate code + * for the setup and revert functions below. + */ +static struct inodedep * +inodedep_lookup_ip(ip) + struct inode *ip; +{ + struct inodedep *inodedep; + + KASSERT(ip->i_nlink >= ip->i_effnlink, + ("inodedep_lookup_ip: bad delta")); + (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, + DEPALLOC, &inodedep); + inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; + + return (inodedep); +} + +/* + * Create a journal entry that describes a truncate that we're about to + * perform. The inode allocations and frees between here and the completion + * of the operation are done asynchronously and without journaling. At + * the end of the operation the vnode is sync'd and the journal space + * is released. Recovery will discover the partially completed truncate + * and complete it. + */ +void * +softdep_setup_trunc(vp, length, flags) + struct vnode *vp; + off_t length; + int flags; +{ + struct jsegdep *jsegdep; + struct jtrunc *jtrunc; + struct ufsmount *ump; + struct inode *ip; + + softdep_prealloc(vp, MNT_WAIT); + ip = VTOI(vp); + ump = VFSTOUFS(vp->v_mount); + jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS); + workitem_alloc(&jtrunc->jt_list, D_JTRUNC, vp->v_mount); + jsegdep = jtrunc->jt_jsegdep = newjsegdep(&jtrunc->jt_list); + jtrunc->jt_ino = ip->i_number; + jtrunc->jt_extsize = 0; + jtrunc->jt_size = length; + if ((flags & IO_EXT) == 0 && ump->um_fstype == UFS2) + jtrunc->jt_extsize = ip->i_din2->di_extsize; + if ((flags & IO_NORMAL) == 0) + jtrunc->jt_size = DIP(ip, i_size); + ACQUIRE_LOCK(&lk); + add_to_journal(&jtrunc->jt_list); + while (jsegdep->jd_seg == NULL) { + stat_jwait_freeblks++; + jwait(&jtrunc->jt_list); + } + FREE_LOCK(&lk); + + return (jsegdep); +} + +/* + * After synchronous truncation is complete we free sync the vnode and + * release the jsegdep so the journal space can be freed. + */ +int +softdep_complete_trunc(vp, cookie) + struct vnode *vp; + void *cookie; +{ + int error; + + error = ffs_syncvnode(vp, MNT_WAIT); + ACQUIRE_LOCK(&lk); + free_jsegdep((struct jsegdep *)cookie); + FREE_LOCK(&lk); + + return (error); +} + +/* + * Called prior to creating a new inode and linking it to a directory. The + * jaddref structure must already be allocated by softdep_setup_inomapdep + * and it is discovered here so we can initialize the mode and update + * nlinkdelta. + */ +void +softdep_setup_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + KASSERT(ip->i_nlink == 1, + ("softdep_setup_create: Invalid link count.")); + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, + ("softdep_setup_create: No addref structure present.")); + jaddref->ja_mode = ip->i_mode; + } + softdep_prelink(dvp, NULL); + FREE_LOCK(&lk); +} + +/* + * Create a jaddref structure to track the addition of a DOTDOT link when + * we are reparenting an inode as part of a rename. This jaddref will be + * found by softdep_setup_directory_change. Adjusts nlinkdelta for + * non-journaling softdep. + */ +void +softdep_setup_dotdot_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + struct vnode *vp; + + dvp = ITOV(dp); + vp = ITOV(ip); + jaddref = NULL; + /* + * We don't set MKDIR_PARENT as this is not tied to a mkdir and + * is used as a normal link would be. + */ + if (DOINGSUJ(dvp)) + jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET, + dp->i_effnlink - 1, dp->i_mode); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + if (jaddref) + TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref, + if_deps); + softdep_prelink(dvp, ITOV(ip)); + FREE_LOCK(&lk); +} + +/* + * Create a jaddref structure to track a new link to an inode. The directory + * offset is not known until softdep_setup_directory_add or + * softdep_setup_directory_change. Adjusts nlinkdelta for non-journaling + * softdep. + */ +void +softdep_setup_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + jaddref = NULL; + if (DOINGSUJ(dvp)) + jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1, + ip->i_mode); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + if (jaddref) + TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref, + if_deps); + softdep_prelink(dvp, ITOV(ip)); + FREE_LOCK(&lk); +} + +/* + * Called to create the jaddref structures to track . and .. references as + * well as lookup and further initialize the incomplete jaddref created + * by softdep_setup_inomapdep when the inode was allocated. Adjusts + * nlinkdelta for non-journaling softdep. + */ +void +softdep_setup_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *dotdotaddref; + struct jaddref *dotaddref; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + dotaddref = dotdotaddref = NULL; + if (DOINGSUJ(dvp)) { + dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1, + ip->i_mode); + dotaddref->ja_state |= MKDIR_BODY; + dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET, + dp->i_effnlink - 1, dp->i_mode); + dotdotaddref->ja_state |= MKDIR_PARENT; + } + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref != NULL, + ("softdep_setup_mkdir: No addref structure present.")); + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_setup_mkdir: bad parent %d", + jaddref->ja_parent)); + jaddref->ja_mode = ip->i_mode; + TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref, + if_deps); + } + inodedep = inodedep_lookup_ip(dp); + if (DOINGSUJ(dvp)) + TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, + &dotdotaddref->ja_ref, if_deps); + softdep_prelink(ITOV(dp), NULL); + FREE_LOCK(&lk); +} + +/* + * Called to track nlinkdelta of the inode and parent directories prior to + * unlinking a directory. + */ +void +softdep_setup_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct vnode *dvp; + + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + softdep_prelink(dvp, ITOV(ip)); + FREE_LOCK(&lk); +} + +/* + * Called to track nlinkdelta of the inode and parent directories prior to + * unlink. + */ +void +softdep_setup_unlink(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct vnode *dvp; + + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + softdep_prelink(dvp, ITOV(ip)); + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed non-directory + * creation. Adjusts nlinkdelta for non-journaling softdep. + */ +void +softdep_revert_create(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_create: addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + } + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed dotdot link + * creation. Adjusts nlinkdelta for non-journaling softdep. + */ +void +softdep_revert_dotdot_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_dotdot_link: addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + } + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed link + * addition. Adjusts nlinkdelta for non-journaling softdep. + */ +void +softdep_revert_link(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(ip); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_link: addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + } + FREE_LOCK(&lk); +} + +/* + * Called to release the journal structures created by a failed mkdir + * attempt. Adjusts nlinkdelta for non-journaling softdep. + */ +void +softdep_revert_mkdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct vnode *dvp; + + dvp = ITOV(dp); + + ACQUIRE_LOCK(&lk); + inodedep = inodedep_lookup_ip(dp); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_mkdir: dotdot addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + } + inodedep = inodedep_lookup_ip(ip); + if (DOINGSUJ(dvp)) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == dp->i_number, + ("softdep_revert_mkdir: addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref->ja_parent == ip->i_number, + ("softdep_revert_mkdir: dot addref parent mismatch")); + cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + } + FREE_LOCK(&lk); +} + +/* + * Called to correct nlinkdelta after a failed rmdir. + */ +void +softdep_revert_rmdir(dp, ip) + struct inode *dp; + struct inode *ip; +{ + + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup_ip(ip); + (void) inodedep_lookup_ip(dp); + FREE_LOCK(&lk); +} + /* * Protecting the freemaps (or bitmaps). * @@ -1536,6 +4144,22 @@ softdep_setup_inomapdep(bp, ip, newinum) { struct inodedep *inodedep; struct bmsafemap *bmsafemap; + struct jaddref *jaddref; + struct mount *mp; + struct fs *fs; + + mp = UFSTOVFS(ip->i_ump); + fs = ip->i_ump->um_fs; + jaddref = NULL; + + /* + * Allocate the journal reference add structure so that the bitmap + * can be dependent on it. + */ + if (mp->mnt_kern_flag & MNTK_SUJ) { + jaddref = newjaddref(ip, newinum, 0, 0, 0); + jaddref->ja_state |= NEWBLOCK; + } /* * Create a dependency for the newly allocated inode. @@ -1544,14 +4168,20 @@ softdep_setup_inomapdep(bp, ip, newinum) * the cylinder group map from which it was allocated. */ ACQUIRE_LOCK(&lk); - if ((inodedep_lookup(UFSTOVFS(ip->i_ump), newinum, DEPALLOC|NODELAY, - &inodedep))) - panic("softdep_setup_inomapdep: dependency for new inode " - "already exists"); - inodedep->id_buf = bp; + if ((inodedep_lookup(mp, newinum, DEPALLOC|NODELAY, &inodedep))) + panic("softdep_setup_inomapdep: dependency %p for new" + "inode already exists", inodedep); + bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum)); + if (jaddref) { + LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps); + TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref, + if_deps); + } else { + inodedep->id_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps); + } + inodedep->id_bmsafemap = bmsafemap; inodedep->id_state &= ~DEPCOMPLETE; - bmsafemap = bmsafemap_lookup(inodedep->id_list.wk_mp, bp); - LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps); FREE_LOCK(&lk); } @@ -1560,29 +4190,98 @@ softdep_setup_inomapdep(bp, ip, newinum) * allocate block or fragment. */ void -softdep_setup_blkmapdep(bp, mp, newblkno) +softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags) struct buf *bp; /* buffer for cylgroup block with block map */ struct mount *mp; /* filesystem doing allocation */ ufs2_daddr_t newblkno; /* number of newly allocated block */ + int frags; /* Number of fragments. */ + int oldfrags; /* Previous number of fragments for extend. */ { struct newblk *newblk; struct bmsafemap *bmsafemap; + struct jnewblk *jnewblk; struct fs *fs; fs = VFSTOUFS(mp)->um_fs; + jnewblk = NULL; /* * Create a dependency for the newly allocated block. * Add it to the dependency list for the buffer holding * the cylinder group map from which it was allocated. */ + if (mp->mnt_kern_flag & MNTK_SUJ) { + jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS); + workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp); + jnewblk->jn_jsegdep = newjsegdep(&jnewblk->jn_list); + jnewblk->jn_state = ATTACHED; + jnewblk->jn_blkno = newblkno; + jnewblk->jn_frags = frags; + jnewblk->jn_oldfrags = oldfrags; +#ifdef SUJ_DEBUG + { + struct cg *cgp; + uint8_t *blksfree; + long bno; + int i; + + cgp = (struct cg *)bp->b_data; + blksfree = cg_blksfree(cgp); + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; + i++) { + if (isset(blksfree, bno + i)) + panic("softdep_setup_blkmapdep: " + "free fragment %d from %d-%d " + "state 0x%X dep %p", i, + jnewblk->jn_oldfrags, + jnewblk->jn_frags, + jnewblk->jn_state, + jnewblk->jn_newblk); + } + } +#endif + } ACQUIRE_LOCK(&lk); - if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0) + if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0) panic("softdep_setup_blkmapdep: found block"); - newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp); - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp, + dtog(fs, newblkno)); + if (jnewblk) { + jnewblk->jn_newblk = newblk; + LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps); + } else { + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + } + newblk->nb_bmsafemap = bmsafemap; + newblk->nb_jnewblk = jnewblk; FREE_LOCK(&lk); } +#define BMSAFEMAP_HASH(fs, cg) \ + (&bmsafemap_hashtbl[((((register_t)(fs)) >> 13) + (cg)) & bmsafemap_hash]) + +static int +bmsafemap_find(bmsafemaphd, mp, cg, bmsafemapp) + struct bmsafemap_hashhead *bmsafemaphd; + struct mount *mp; + int cg; + struct bmsafemap **bmsafemapp; +{ + struct bmsafemap *bmsafemap; + + LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash) + if (bmsafemap->sm_list.wk_mp == mp && bmsafemap->sm_cg == cg) + break; + if (bmsafemap) { + *bmsafemapp = bmsafemap; + return (1); + } + *bmsafemapp = NULL; + + return (0); +} + /* * Find the bmsafemap associated with a cylinder group buffer. * If none exists, create one. The buffer must be locked when @@ -1590,27 +4289,43 @@ softdep_setup_blkmapdep(bp, mp, newblkno) * splbio interrupts blocked. */ static struct bmsafemap * -bmsafemap_lookup(mp, bp) +bmsafemap_lookup(mp, bp, cg) struct mount *mp; struct buf *bp; + int cg; { - struct bmsafemap *bmsafemap; + struct bmsafemap_hashhead *bmsafemaphd; + struct bmsafemap *bmsafemap, *collision; struct worklist *wk; + struct fs *fs; mtx_assert(&lk, MA_OWNED); - LIST_FOREACH(wk, &bp->b_dep, wk_list) - if (wk->wk_type == D_BMSAFEMAP) - return (WK_BMSAFEMAP(wk)); + if (bp) + LIST_FOREACH(wk, &bp->b_dep, wk_list) + if (wk->wk_type == D_BMSAFEMAP) + return (WK_BMSAFEMAP(wk)); + fs = VFSTOUFS(mp)->um_fs; + bmsafemaphd = BMSAFEMAP_HASH(fs, cg); + if (bmsafemap_find(bmsafemaphd, mp, cg, &bmsafemap) == 1) + return (bmsafemap); FREE_LOCK(&lk); bmsafemap = malloc(sizeof(struct bmsafemap), M_BMSAFEMAP, M_SOFTDEP_FLAGS); workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp); bmsafemap->sm_buf = bp; - LIST_INIT(&bmsafemap->sm_allocdirecthd); - LIST_INIT(&bmsafemap->sm_allocindirhd); LIST_INIT(&bmsafemap->sm_inodedephd); + LIST_INIT(&bmsafemap->sm_inodedepwr); LIST_INIT(&bmsafemap->sm_newblkhd); + LIST_INIT(&bmsafemap->sm_newblkwr); + LIST_INIT(&bmsafemap->sm_jaddrefhd); + LIST_INIT(&bmsafemap->sm_jnewblkhd); ACQUIRE_LOCK(&lk); + if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) { + WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + return (collision); + } + bmsafemap->sm_cg = cg; + LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash); WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); return (bmsafemap); } @@ -1645,9 +4360,9 @@ bmsafemap_lookup(mp, bp) * unreferenced fragments. */ void -softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) +softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp) struct inode *ip; /* inode to which block is being added */ - ufs_lbn_t lbn; /* block pointer within inode */ + ufs_lbn_t off; /* block pointer within inode */ ufs2_daddr_t newblkno; /* disk block number being added */ ufs2_daddr_t oldblkno; /* previous block number, 0 unless frag */ long newsize; /* size of new block */ @@ -1656,34 +4371,33 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) { struct allocdirect *adp, *oldadp; struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; + struct freefrag *freefrag; struct inodedep *inodedep; struct pagedep *pagedep; + struct jnewblk *jnewblk; struct newblk *newblk; struct mount *mp; + ufs_lbn_t lbn; + lbn = bp->b_lblkno; mp = UFSTOVFS(ip->i_ump); - adp = malloc(sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp); - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; - adp->ad_oldblkno = oldblkno; - adp->ad_newsize = newsize; - adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; + if (oldblkno && oldblkno != newblkno) + freefrag = newfreefrag(ip, oldblkno, oldsize, lbn); else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); + freefrag = NULL; ACQUIRE_LOCK(&lk); - if (lbn >= NDADDR) { + if (off >= NDADDR) { + if (lbn > 0) + panic("softdep_setup_allocdirect: bad lbn %jd, off %jd", + lbn, off); /* allocating an indirect block */ if (oldblkno != 0) panic("softdep_setup_allocdirect: non-zero indir"); } else { + if (off != lbn) + panic("softdep_setup_allocdirect: lbn %jd != off %jd", + lbn, off); /* * Allocating a direct block. * @@ -1692,26 +4406,39 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) * deletions. */ if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) + pagedep_lookup(mp, ip->i_number, off, DEPALLOC, + &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); } - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) + if (newblk_lookup(mp, newblkno, 0, &newblk) == 0) panic("softdep_setup_allocdirect: lost block"); - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); - } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); + KASSERT(newblk->nb_list.wk_type == D_NEWBLK, + ("softdep_setup_allocdirect: newblk already initialized")); + /* + * Convert the newblk to an allocdirect. + */ + newblk->nb_list.wk_type = D_ALLOCDIRECT; + adp = (struct allocdirect *)newblk; + newblk->nb_freefrag = freefrag; + adp->ad_offset = off; + adp->ad_oldblkno = oldblkno; + adp->ad_newsize = newsize; + adp->ad_oldsize = oldsize; + /* + * Finish initializing the journal. + */ + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = lbn; + add_to_journal(&jnewblk->jn_list); + } + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep); adp->ad_inodedep = inodedep; - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); + + WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list); /* * The list of allocdirects must be kept in sorted and ascending * order so that the rollback routines can quickly determine the @@ -1726,24 +4453,25 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) */ adphead = &inodedep->id_newinoupdt; oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { + if (oldadp == NULL || oldadp->ad_offset <= off) { /* insert at end of list */ TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) + if (oldadp != NULL && oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); return; } TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) + if (oldadp->ad_offset >= off) break; } if (oldadp == NULL) panic("softdep_setup_allocdirect: lost entry"); /* insert in middle of list */ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) + if (oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); + FREE_LOCK(&lk); } @@ -1761,10 +4489,11 @@ allocdirect_merge(adphead, newadp, oldadp) struct freefrag *freefrag; struct newdirblk *newdirblk; + freefrag = NULL; mtx_assert(&lk, MA_OWNED); if (newadp->ad_oldblkno != oldadp->ad_newblkno || newadp->ad_oldsize != oldadp->ad_newsize || - newadp->ad_lbn >= NDADDR) + newadp->ad_offset >= NDADDR) panic("%s %jd != new %jd || old size %ld != new %ld", "allocdirect_merge: old blkno", (intmax_t)newadp->ad_oldblkno, @@ -1779,7 +4508,7 @@ allocdirect_merge(adphead, newadp, oldadp) * This action is done by swapping the freefrag dependencies. * The new dependency gains the old one's freefrag, and the * old one gets the new one and then immediately puts it on - * the worklist when it is freed by free_allocdirect. It is + * the worklist when it is freed by free_newblk. It is * not possible to do this swap when the old dependency had a * non-zero size but no previous fragment to free. This condition * arises when the new block is an extension of the old block. @@ -1788,8 +4517,8 @@ allocdirect_merge(adphead, newadp, oldadp) * the old dependency, so cannot legitimately be freed until the * conditions for the new dependency are fulfilled. */ + freefrag = newadp->ad_freefrag; if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) { - freefrag = newadp->ad_freefrag; newadp->ad_freefrag = oldadp->ad_freefrag; oldadp->ad_freefrag = freefrag; } @@ -1804,32 +4533,118 @@ allocdirect_merge(adphead, newadp, oldadp) panic("allocdirect_merge: extra newdirblk"); WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); } - free_allocdirect(adphead, oldadp, 0); + TAILQ_REMOVE(adphead, oldadp, ad_next); + /* + * We need to move any journal dependencies over to the freefrag + * that releases this block if it exists. Otherwise we are + * extending an existing block and we'll wait until that is + * complete to release the journal space and extend the + * new journal to cover this old space as well. + */ + if (freefrag == NULL) { + struct jnewblk *jnewblk; + struct jnewblk *njnewblk; + + if (oldadp->ad_newblkno != newadp->ad_newblkno) + panic("allocdirect_merge: %jd != %jd", + oldadp->ad_newblkno, newadp->ad_newblkno); + jnewblk = oldadp->ad_block.nb_jnewblk; + cancel_newblk(&oldadp->ad_block, &newadp->ad_block.nb_jwork); + /* + * We have an unwritten jnewblk, we need to merge the + * frag bits with our own. The newer adp's journal can not + * be written prior to the old one so no need to check for + * it here. + */ + if (jnewblk) { + njnewblk = newadp->ad_block.nb_jnewblk; + if (njnewblk == NULL) + panic("allocdirect_merge: No jnewblk"); + if (jnewblk->jn_state & UNDONE) { + njnewblk->jn_state |= UNDONE | NEWBLOCK; + njnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state &= ~UNDONE; + } + njnewblk->jn_oldfrags = jnewblk->jn_oldfrags; + WORKLIST_REMOVE(&jnewblk->jn_list); + jnewblk->jn_state |= ATTACHED | COMPLETE; + free_jnewblk(jnewblk); + } + } else { + /* + * We can skip journaling for this freefrag and just complete + * any pending journal work for the allocdirect that is being + * removed after the freefrag completes. + */ + if (freefrag->ff_jfreefrag) + cancel_jfreefrag(freefrag->ff_jfreefrag); + cancel_newblk(&oldadp->ad_block, &freefrag->ff_jwork); + } + free_newblk(&oldadp->ad_block); } - + /* - * Allocate a new freefrag structure if needed. + * Allocate a jfreefrag structure to journal a single block free. */ -static struct freefrag * -newfreefrag(ip, blkno, size) +static struct jfreefrag * +newjfreefrag(freefrag, ip, blkno, size, lbn) + struct freefrag *freefrag; struct inode *ip; ufs2_daddr_t blkno; long size; + ufs_lbn_t lbn; +{ + struct jfreefrag *jfreefrag; + struct fs *fs; + + fs = ip->i_fs; + jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG, + M_SOFTDEP_FLAGS); + workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, UFSTOVFS(ip->i_ump)); + jfreefrag->fr_jsegdep = newjsegdep(&jfreefrag->fr_list); + jfreefrag->fr_state = ATTACHED | DEPCOMPLETE; + jfreefrag->fr_ino = ip->i_number; + jfreefrag->fr_lbn = lbn; + jfreefrag->fr_blkno = blkno; + jfreefrag->fr_frags = numfrags(fs, size); + jfreefrag->fr_freefrag = freefrag; + + return (jfreefrag); +} + +/* + * Allocate a new freefrag structure. + */ +static struct freefrag * +newfreefrag(ip, blkno, size, lbn) + struct inode *ip; + ufs2_daddr_t blkno; + long size; + ufs_lbn_t lbn; { struct freefrag *freefrag; struct fs *fs; - if (blkno == 0) - return (NULL); fs = ip->i_fs; if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag) panic("newfreefrag: frag size"); freefrag = malloc(sizeof(struct freefrag), - M_FREEFRAG, M_SOFTDEP_FLAGS); + M_FREEFRAG, M_SOFTDEP_FLAGS); workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ip->i_ump)); + freefrag->ff_state = ATTACHED; + LIST_INIT(&freefrag->ff_jwork); freefrag->ff_inum = ip->i_number; freefrag->ff_blkno = blkno; freefrag->ff_fragsize = size; + + if (fs->fs_flags & FS_SUJ) { + freefrag->ff_jfreefrag = + newjfreefrag(freefrag, ip, blkno, size, lbn); + } else { + freefrag->ff_state |= DEPCOMPLETE; + freefrag->ff_jfreefrag = NULL; + } + return (freefrag); } @@ -1842,9 +4657,17 @@ handle_workitem_freefrag(freefrag) struct freefrag *freefrag; { struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp); + struct workhead wkhd; + /* + * It would be illegal to add new completion items to the + * freefrag after it was schedule to be done so it must be + * safe to modify the list head here. + */ + LIST_INIT(&wkhd); + LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list); ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno, - freefrag->ff_fragsize, freefrag->ff_inum); + freefrag->ff_fragsize, freefrag->ff_inum, &wkhd); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefrag, D_FREEFRAG); FREE_LOCK(&lk); @@ -1856,9 +4679,9 @@ handle_workitem_freefrag(freefrag) * See the description of softdep_setup_allocdirect above for details. */ void -softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) +softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp) struct inode *ip; - ufs_lbn_t lbn; + ufs_lbn_t off; ufs2_daddr_t newblkno; ufs2_daddr_t oldblkno; long newsize; @@ -1867,50 +4690,55 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) { struct allocdirect *adp, *oldadp; struct allocdirectlst *adphead; - struct bmsafemap *bmsafemap; + struct freefrag *freefrag; struct inodedep *inodedep; + struct jnewblk *jnewblk; struct newblk *newblk; struct mount *mp; + ufs_lbn_t lbn; + if (off >= NXADDR) + panic("softdep_setup_allocext: lbn %lld > NXADDR", + (long long)off); + + lbn = bp->b_lblkno; mp = UFSTOVFS(ip->i_ump); - adp = malloc(sizeof(struct allocdirect), - M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&adp->ad_list, D_ALLOCDIRECT, mp); - adp->ad_lbn = lbn; - adp->ad_newblkno = newblkno; + if (oldblkno && oldblkno != newblkno) + freefrag = newfreefrag(ip, oldblkno, oldsize, lbn); + else + freefrag = NULL; + + ACQUIRE_LOCK(&lk); + if (newblk_lookup(mp, newblkno, 0, &newblk) == 0) + panic("softdep_setup_allocext: lost block"); + KASSERT(newblk->nb_list.wk_type == D_NEWBLK, + ("softdep_setup_allocext: newblk already initialized")); + /* + * Convert the newblk to an allocdirect. + */ + newblk->nb_list.wk_type = D_ALLOCDIRECT; + adp = (struct allocdirect *)newblk; + newblk->nb_freefrag = freefrag; + adp->ad_offset = off; adp->ad_oldblkno = oldblkno; adp->ad_newsize = newsize; adp->ad_oldsize = oldsize; - adp->ad_state = ATTACHED | EXTDATA; - LIST_INIT(&adp->ad_newdirblk); - if (newblkno == oldblkno) - adp->ad_freefrag = NULL; - else - adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize); - - ACQUIRE_LOCK(&lk); - if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0) - panic("softdep_setup_allocext: lost block"); + adp->ad_state |= EXTDATA; + /* + * Finish initializing the journal. + */ + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = lbn; + add_to_journal(&jnewblk->jn_list); + } + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); inodedep_lookup(mp, ip->i_number, DEPALLOC | NODELAY, &inodedep); adp->ad_inodedep = inodedep; - if (newblk->nb_state == DEPCOMPLETE) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - adp->ad_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps); - } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); - - WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); - if (lbn >= NXADDR) - panic("softdep_setup_allocext: lbn %lld > NXADDR", - (long long)lbn); + WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list); /* * The list of allocdirects must be kept in sorted and ascending * order so that the rollback routines can quickly determine the @@ -1925,23 +4753,23 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) */ adphead = &inodedep->id_newextupdt; oldadp = TAILQ_LAST(adphead, allocdirectlst); - if (oldadp == NULL || oldadp->ad_lbn <= lbn) { + if (oldadp == NULL || oldadp->ad_offset <= off) { /* insert at end of list */ TAILQ_INSERT_TAIL(adphead, adp, ad_next); - if (oldadp != NULL && oldadp->ad_lbn == lbn) + if (oldadp != NULL && oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); return; } TAILQ_FOREACH(oldadp, adphead, ad_next) { - if (oldadp->ad_lbn >= lbn) + if (oldadp->ad_offset >= off) break; } if (oldadp == NULL) panic("softdep_setup_allocext: lost entry"); /* insert in middle of list */ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next); - if (oldadp->ad_lbn == lbn) + if (oldadp->ad_offset == off) allocdirect_merge(adphead, adp, oldadp); FREE_LOCK(&lk); } @@ -1975,22 +4803,39 @@ softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) * Allocate a new allocindir structure. */ static struct allocindir * -newallocindir(ip, ptrno, newblkno, oldblkno) +newallocindir(ip, ptrno, newblkno, oldblkno, lbn) struct inode *ip; /* inode for file being extended */ int ptrno; /* offset of pointer in indirect block */ ufs2_daddr_t newblkno; /* disk block number being added */ ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ + ufs_lbn_t lbn; { + struct newblk *newblk; struct allocindir *aip; + struct freefrag *freefrag; + struct jnewblk *jnewblk; - aip = malloc(sizeof(struct allocindir), - M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&aip->ai_list, D_ALLOCINDIR, UFSTOVFS(ip->i_ump)); - aip->ai_state = ATTACHED; + if (oldblkno) + freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize, lbn); + else + freefrag = NULL; + ACQUIRE_LOCK(&lk); + if (newblk_lookup(UFSTOVFS(ip->i_ump), newblkno, 0, &newblk) == 0) + panic("new_allocindir: lost block"); + KASSERT(newblk->nb_list.wk_type == D_NEWBLK, + ("newallocindir: newblk already initialized")); + newblk->nb_list.wk_type = D_ALLOCINDIR; + newblk->nb_freefrag = freefrag; + aip = (struct allocindir *)newblk; aip->ai_offset = ptrno; - aip->ai_newblkno = newblkno; aip->ai_oldblkno = oldblkno; - aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize); + if ((jnewblk = newblk->nb_jnewblk) != NULL) { + jnewblk->jn_ino = ip->i_number; + jnewblk->jn_lbn = lbn; + add_to_journal(&jnewblk->jn_list); + } + if (freefrag && freefrag->ff_jfreefrag != NULL) + add_to_journal(&freefrag->ff_jfreefrag->fr_list); return (aip); } @@ -2008,22 +4853,28 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) ufs2_daddr_t oldblkno; /* previous block number, 0 if none */ struct buf *nbp; /* buffer holding allocated page */ { + struct inodedep *inodedep; struct allocindir *aip; struct pagedep *pagedep; + struct mount *mp; + if (lbn != nbp->b_lblkno) + panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd", + lbn, bp->b_lblkno); ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page"); - aip = newallocindir(ip, ptrno, newblkno, oldblkno); - ACQUIRE_LOCK(&lk); + mp = UFSTOVFS(ip->i_ump); + aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn); + (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); /* * If we are allocating a directory page, then we must * allocate an associated pagedep to track additions and * deletions. */ if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) + pagedep_lookup(mp, ip->i_number, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - setup_allocindir_phase2(bp, ip, aip); + WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); + setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); FREE_LOCK(&lk); } @@ -2038,15 +4889,40 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) struct buf *bp; /* indirect block referencing allocated block */ int ptrno; /* offset of pointer in indirect block */ ufs2_daddr_t newblkno; /* disk block number being added */ +{ + struct inodedep *inodedep; + struct allocindir *aip; + ufs_lbn_t lbn; + + lbn = nbp->b_lblkno; + ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta"); + aip = newallocindir(ip, ptrno, newblkno, 0, lbn); + inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep); + WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); + setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + FREE_LOCK(&lk); +} + +static void +indirdep_complete(indirdep) + struct indirdep *indirdep; { struct allocindir *aip; - ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta"); - aip = newallocindir(ip, ptrno, newblkno, 0); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); - setup_allocindir_phase2(bp, ip, aip); - FREE_LOCK(&lk); + LIST_REMOVE(indirdep, ir_next); + indirdep->ir_state &= ~ONDEPLIST; + + while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { + LIST_REMOVE(aip, ai_next); + free_newblk(&aip->ai_block); + } + /* + * If this indirdep is not attached to a buf it was simply waiting + * on completion to clear completehd. free_indirdep() asserts + * that nothing is dangling. + */ + if ((indirdep->ir_state & ONWORKLIST) == 0) + free_indirdep(indirdep); } /* @@ -2054,23 +4930,28 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) * by one of the two routines above. */ static void -setup_allocindir_phase2(bp, ip, aip) +setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) struct buf *bp; /* in-memory copy of the indirect block */ struct inode *ip; /* inode for file being extended */ + struct inodedep *inodedep; /* Inodedep for ip */ struct allocindir *aip; /* allocindir allocated by the above routines */ + ufs_lbn_t lbn; /* Logical block number for this block. */ { struct worklist *wk; + struct fs *fs; + struct newblk *newblk; struct indirdep *indirdep, *newindirdep; - struct bmsafemap *bmsafemap; struct allocindir *oldaip; struct freefrag *freefrag; - struct newblk *newblk; + struct mount *mp; ufs2_daddr_t blkno; + mp = UFSTOVFS(ip->i_ump); + fs = ip->i_fs; mtx_assert(&lk, MA_OWNED); if (bp->b_lblkno >= 0) panic("setup_allocindir_phase2: not indir blk"); - for (indirdep = NULL, newindirdep = NULL; ; ) { + for (freefrag = NULL, indirdep = NULL, newindirdep = NULL; ; ) { LIST_FOREACH(wk, &bp->b_dep, wk_list) { if (wk->wk_type != D_INDIRDEP) continue; @@ -2079,49 +4960,41 @@ setup_allocindir_phase2(bp, ip, aip) } if (indirdep == NULL && newindirdep) { indirdep = newindirdep; - WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); newindirdep = NULL; + WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); + if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, + &newblk)) { + indirdep->ir_state |= ONDEPLIST; + LIST_INSERT_HEAD(&newblk->nb_indirdeps, + indirdep, ir_next); + } else + indirdep->ir_state |= DEPCOMPLETE; } if (indirdep) { - if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0, - &newblk) == 0) - panic("setup_allocindir: lost block"); - if (newblk->nb_state == DEPCOMPLETE) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - } else { - bmsafemap = newblk->nb_bmsafemap; - aip->ai_buf = bmsafemap->sm_buf; - LIST_REMOVE(newblk, nb_deps); - LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd, - aip, ai_deps); - } - LIST_REMOVE(newblk, nb_hash); - free(newblk, M_NEWBLK); aip->ai_indirdep = indirdep; /* * Check to see if there is an existing dependency * for this block. If there is, merge the old - * dependency into the new one. + * dependency into the new one. This happens + * as a result of reallocblk only. */ if (aip->ai_oldblkno == 0) oldaip = NULL; else - LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) + LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, + ai_next) if (oldaip->ai_offset == aip->ai_offset) break; - freefrag = NULL; - if (oldaip != NULL) { - if (oldaip->ai_newblkno != aip->ai_oldblkno) - panic("setup_allocindir_phase2: blkno"); - aip->ai_oldblkno = oldaip->ai_oldblkno; - freefrag = aip->ai_freefrag; - aip->ai_freefrag = oldaip->ai_freefrag; - oldaip->ai_freefrag = NULL; - free_allocindir(oldaip, NULL); - } + if (oldaip != NULL) + freefrag = allocindir_merge(aip, oldaip); LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); + KASSERT(aip->ai_offset >= 0 && + aip->ai_offset < NINDIR(ip->i_ump->um_fs), + ("setup_allocindir_phase2: Bad offset %d", + aip->ai_offset)); + KASSERT(indirdep->ir_savebp != NULL, + ("setup_allocindir_phase2 NULL ir_savebp")); if (ip->i_ump->um_fstype == UFS1) ((ufs1_daddr_t *)indirdep->ir_savebp->b_data) [aip->ai_offset] = aip->ai_oldblkno; @@ -2148,13 +5021,16 @@ setup_allocindir_phase2(bp, ip, aip) } newindirdep = malloc(sizeof(struct indirdep), M_INDIRDEP, M_SOFTDEP_FLAGS); - workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, - UFSTOVFS(ip->i_ump)); + workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp); newindirdep->ir_state = ATTACHED; if (ip->i_ump->um_fstype == UFS1) newindirdep->ir_state |= UFS1FMT; + newindirdep->ir_saveddata = NULL; LIST_INIT(&newindirdep->ir_deplisthd); LIST_INIT(&newindirdep->ir_donehd); + LIST_INIT(&newindirdep->ir_writehd); + LIST_INIT(&newindirdep->ir_completehd); + LIST_INIT(&newindirdep->ir_jwork); if (bp->b_blkno == bp->b_lblkno) { ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp, NULL, NULL); @@ -2168,6 +5044,51 @@ setup_allocindir_phase2(bp, ip, aip) } } +/* + * Merge two allocindirs which refer to the same block. Move newblock + * dependencies and setup the freefrags appropriately. + */ +static struct freefrag * +allocindir_merge(aip, oldaip) + struct allocindir *aip; + struct allocindir *oldaip; +{ + struct newdirblk *newdirblk; + struct freefrag *freefrag; + struct worklist *wk; + + if (oldaip->ai_newblkno != aip->ai_oldblkno) + panic("allocindir_merge: blkno"); + aip->ai_oldblkno = oldaip->ai_oldblkno; + freefrag = aip->ai_freefrag; + aip->ai_freefrag = oldaip->ai_freefrag; + oldaip->ai_freefrag = NULL; + KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag")); + /* + * If we are tracking a new directory-block allocation, + * move it from the old allocindir to the new allocindir. + */ + if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) { + newdirblk = WK_NEWDIRBLK(wk); + WORKLIST_REMOVE(&newdirblk->db_list); + if (!LIST_EMPTY(&oldaip->ai_newdirblk)) + panic("allocindir_merge: extra newdirblk"); + WORKLIST_INSERT(&aip->ai_newdirblk, &newdirblk->db_list); + } + /* + * We can skip journaling for this freefrag and just complete + * any pending journal work for the allocindir that is being + * removed after the freefrag completes. + */ + if (freefrag->ff_jfreefrag) + cancel_jfreefrag(freefrag->ff_jfreefrag); + LIST_REMOVE(oldaip, ai_next); + cancel_newblk(&oldaip->ai_block, &freefrag->ff_jwork); + free_newblk(&oldaip->ai_block); + + return (freefrag); +} + /* * Block de-allocation dependencies. * @@ -2203,9 +5124,12 @@ softdep_setup_freeblocks(ip, length, flags) off_t length; /* The new length for the file */ int flags; /* IO_EXT and/or IO_NORMAL */ { + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; struct freeblks *freeblks; struct inodedep *inodedep; struct allocdirect *adp; + struct jfreeblk *jfreeblk; struct bufobj *bo; struct vnode *vp; struct buf *bp; @@ -2213,6 +5137,13 @@ softdep_setup_freeblocks(ip, length, flags) ufs2_daddr_t extblocks, datablocks; struct mount *mp; int i, delay, error; + ufs2_daddr_t blkno; + ufs_lbn_t tmpval; + ufs_lbn_t lbn; + long oldextsize; + long oldsize; + int frags; + int needj; fs = ip->i_fs; mp = UFSTOVFS(ip->i_ump); @@ -2221,32 +5152,53 @@ softdep_setup_freeblocks(ip, length, flags) freeblks = malloc(sizeof(struct freeblks), M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp); + LIST_INIT(&freeblks->fb_jfreeblkhd); + LIST_INIT(&freeblks->fb_jwork); freeblks->fb_state = ATTACHED; freeblks->fb_uid = ip->i_uid; freeblks->fb_previousinum = ip->i_number; freeblks->fb_devvp = ip->i_devvp; + freeblks->fb_chkcnt = 0; ACQUIRE_LOCK(&lk); + /* + * If we're truncating a removed file that will never be written + * we don't need to journal the block frees. The canceled journals + * for the allocations will suffice. + */ + inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED || + (fs->fs_flags & FS_SUJ) == 0) + needj = 0; + else + needj = 1; num_freeblkdep++; FREE_LOCK(&lk); extblocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC) extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); datablocks = DIP(ip, i_blocks) - extblocks; - if ((flags & IO_NORMAL) == 0) { - freeblks->fb_oldsize = 0; - freeblks->fb_chkcnt = 0; - } else { - freeblks->fb_oldsize = ip->i_size; + if ((flags & IO_NORMAL) != 0) { + oldsize = ip->i_size; ip->i_size = 0; DIP_SET(ip, i_size, 0); freeblks->fb_chkcnt = datablocks; for (i = 0; i < NDADDR; i++) { - freeblks->fb_dblks[i] = DIP(ip, i_db[i]); + blkno = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], 0); + if (blkno == 0) + continue; + frags = sblksize(fs, oldsize, i); + frags = numfrags(fs, frags); + newfreework(freeblks, NULL, i, blkno, frags, needj); } - for (i = 0; i < NIADDR; i++) { - freeblks->fb_iblks[i] = DIP(ip, i_ib[i]); + for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; + i++, tmpval *= NINDIR(fs)) { + blkno = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], 0); + if (blkno) + newfreework(freeblks, NULL, -lbn - i, blkno, + fs->fs_frag, needj); + lbn += tmpval; } /* * If the file was removed, then the space being freed was @@ -2259,17 +5211,23 @@ softdep_setup_freeblocks(ip, length, flags) UFS_UNLOCK(ip->i_ump); } } - if ((flags & IO_EXT) == 0) { - freeblks->fb_oldextsize = 0; - } else { - freeblks->fb_oldextsize = ip->i_din2->di_extsize; + if ((flags & IO_EXT) != 0) { + oldextsize = ip->i_din2->di_extsize; ip->i_din2->di_extsize = 0; freeblks->fb_chkcnt += extblocks; for (i = 0; i < NXADDR; i++) { - freeblks->fb_eblks[i] = ip->i_din2->di_extb[i]; + blkno = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; + if (blkno == 0) + continue; + frags = sblksize(fs, oldextsize, i); + frags = numfrags(fs, frags); + newfreework(freeblks, NULL, -1 - i, blkno, frags, + needj); } } + if (LIST_EMPTY(&freeblks->fb_jfreeblkhd)) + needj = 0; DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt); /* * Push the zero'ed inode to to its disk buffer so that we are free @@ -2282,12 +5240,17 @@ softdep_setup_freeblocks(ip, length, flags) brelse(bp); softdep_error("softdep_setup_freeblocks", error); } - if (ip->i_ump->um_fstype == UFS1) - *((struct ufs1_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; - else - *((struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; + if (ip->i_ump->um_fstype == UFS1) { + dp1 = ((struct ufs1_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)); + ip->i_din1->di_freelink = dp1->di_freelink; + *dp1 = *ip->i_din1; + } else { + dp2 = ((struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)); + ip->i_din2->di_freelink = dp2->di_freelink; + *dp2 = *ip->i_din2; + } /* * Find and eliminate any inode dependencies. */ @@ -2304,7 +5267,9 @@ softdep_setup_freeblocks(ip, length, flags) */ delay = (inodedep->id_state & DEPCOMPLETE); if (delay) - WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list); + WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); + else if (needj) + freeblks->fb_state |= DEPCOMPLETE | COMPLETE; /* * Because the file length has been truncated to zero, any * pending block allocation dependency structures associated @@ -2318,14 +5283,19 @@ softdep_setup_freeblocks(ip, length, flags) merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) - free_allocdirect(&inodedep->id_inoupdt, adp, delay); + cancel_allocdirect(&inodedep->id_inoupdt, adp, + freeblks, delay); } if (flags & IO_EXT) { merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) - free_allocdirect(&inodedep->id_extupdt, adp, delay); + cancel_allocdirect(&inodedep->id_extupdt, adp, + freeblks, delay); } + LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) + add_to_journal(&jfreeblk->jf_list); + FREE_LOCK(&lk); bdwrite(bp); /* @@ -2349,9 +5319,9 @@ restart: BO_UNLOCK(bo); ACQUIRE_LOCK(&lk); (void) inodedep_lookup(mp, ip->i_number, 0, &inodedep); - deallocate_dependencies(bp, inodedep); + if (deallocate_dependencies(bp, inodedep, freeblks)) + bp->b_flags |= B_INVAL | B_NOCACHE; FREE_LOCK(&lk); - bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); BO_LOCK(bo); goto restart; @@ -2361,7 +5331,7 @@ restart: if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); - if(delay) { + if (delay) { freeblks->fb_state |= DEPCOMPLETE; /* * If the inode with zeroed block pointers is now on disk @@ -2371,16 +5341,16 @@ restart: * the request here than in the !delay case. */ if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) - add_to_worklist(&freeblks->fb_list); + add_to_worklist(&freeblks->fb_list, 1); } FREE_LOCK(&lk); /* - * If the inode has never been written to disk (delay == 0), - * then we can process the freeblks now that we have deleted - * the dependencies. + * If the inode has never been written to disk (delay == 0) and + * we're not waiting on any journal writes, then we can process the + * freeblks now that we have deleted the dependencies. */ - if (!delay) + if (!delay && !needj) handle_workitem_freeblocks(freeblks, 0); } @@ -2389,19 +5359,23 @@ restart: * be reallocated to a new vnode. The buffer must be locked, thus, * no I/O completion operations can occur while we are manipulating * its associated dependencies. The mutex is held so that other I/O's - * associated with related dependencies do not occur. + * associated with related dependencies do not occur. Returns 1 if + * all dependencies were cleared, 0 otherwise. */ -static void -deallocate_dependencies(bp, inodedep) +static int +deallocate_dependencies(bp, inodedep, freeblks) struct buf *bp; struct inodedep *inodedep; + struct freeblks *freeblks; { struct worklist *wk; struct indirdep *indirdep; + struct newdirblk *newdirblk; struct allocindir *aip; struct pagedep *pagedep; + struct jremref *jremref; + struct jmvref *jmvref; struct dirrem *dirrem; - struct diradd *dap; int i; mtx_assert(&lk, MA_OWNED); @@ -2410,47 +5384,24 @@ deallocate_dependencies(bp, inodedep) case D_INDIRDEP: indirdep = WK_INDIRDEP(wk); - /* - * None of the indirect pointers will ever be visible, - * so they can simply be tossed. GOINGAWAY ensures - * that allocated pointers will be saved in the buffer - * cache until they are freed. Note that they will - * only be able to be found by their physical address - * since the inode mapping the logical address will - * be gone. The save buffer used for the safe copy - * was allocated in setup_allocindir_phase2 using - * the physical address so it could be used for this - * purpose. Hence we swap the safe copy with the real - * copy, allowing the safe copy to be freed and holding - * on to the real copy for later use in indir_trunc. - */ - if (indirdep->ir_state & GOINGAWAY) - panic("deallocate_dependencies: already gone"); - indirdep->ir_state |= GOINGAWAY; - VFSTOUFS(bp->b_vp->v_mount)->um_numindirdeps += 1; - while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) - free_allocindir(aip, inodedep); if (bp->b_lblkno >= 0 || bp->b_blkno != indirdep->ir_savebp->b_lblkno) panic("deallocate_dependencies: not indir"); - bcopy(bp->b_data, indirdep->ir_savebp->b_data, - bp->b_bcount); - WORKLIST_REMOVE(wk); - WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk); + cancel_indirdep(indirdep, bp, inodedep, freeblks); continue; case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); /* - * None of the directory additions will ever be - * visible, so they can simply be tossed. + * There should be no directory add dependencies present + * as the directory could not be truncated until all + * children were removed. */ + KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, + ("deallocate_dependencies: pendinghd != NULL")); for (i = 0; i < DAHASHSZ; i++) - while ((dap = - LIST_FIRST(&pagedep->pd_diraddhd[i]))) - free_diradd(dap); - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0) - free_diradd(dap); + KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, + ("deallocate_dependencies: diraddhd != NULL")); /* * Copy any directory remove dependencies to the list * to be processed after the zero'ed inode is written. @@ -2458,28 +5409,40 @@ deallocate_dependencies(bp, inodedep) * can be dumped directly onto the work list. */ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + /* + * If there are any dirrems we wait for + * the journal write to complete and + * then restart the buf scan as the lock + * has been dropped. + */ + while ((jremref = + LIST_FIRST(&dirrem->dm_jremrefhd)) + != NULL) { + stat_jwait_filepage++; + jwait(&jremref->jr_list); + return (0); + } LIST_REMOVE(dirrem, dm_next); dirrem->dm_dirinum = pagedep->pd_ino; if (inodedep == NULL || (inodedep->id_state & ALLCOMPLETE) == - ALLCOMPLETE) - add_to_worklist(&dirrem->dm_list); - else + ALLCOMPLETE) { + dirrem->dm_state |= COMPLETE; + add_to_worklist(&dirrem->dm_list, 0); + } else WORKLIST_INSERT(&inodedep->id_bufwait, &dirrem->dm_list); } if ((pagedep->pd_state & NEWBLOCK) != 0) { - LIST_FOREACH(wk, &inodedep->id_bufwait, wk_list) - if (wk->wk_type == D_NEWDIRBLK && - WK_NEWDIRBLK(wk)->db_pagedep == - pagedep) - break; - if (wk != NULL) { - WORKLIST_REMOVE(wk); - free_newdirblk(WK_NEWDIRBLK(wk)); - } else - panic("deallocate_dependencies: " - "lost pagedep"); + newdirblk = pagedep->pd_newdirblk; + WORKLIST_REMOVE(&newdirblk->db_list); + free_newdirblk(newdirblk); + } + while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) + != NULL) { + stat_jwait_filepage++; + jwait(&jmvref->jm_list); + return (0); } WORKLIST_REMOVE(&pagedep->pd_list); LIST_REMOVE(pagedep, pd_hash); @@ -2487,7 +5450,8 @@ deallocate_dependencies(bp, inodedep) continue; case D_ALLOCINDIR: - free_allocindir(WK_ALLOCINDIR(wk), inodedep); + aip = WK_ALLOCINDIR(wk); + cancel_allocindir(aip, inodedep, freeblks); continue; case D_ALLOCDIRECT: @@ -2502,46 +5466,155 @@ deallocate_dependencies(bp, inodedep) /* NOTREACHED */ } } + + return (1); } /* - * Free an allocdirect. Generate a new freefrag work request if appropriate. - * This routine must be called with splbio interrupts blocked. + * An allocdirect is being canceled due to a truncate. We must make sure + * the journal entry is released in concert with the blkfree that releases + * the storage. Completed journal entries must not be released until the + * space is no longer pointed to by the inode or in the bitmap. */ static void -free_allocdirect(adphead, adp, delay) +cancel_allocdirect(adphead, adp, freeblks, delay) struct allocdirectlst *adphead; struct allocdirect *adp; + struct freeblks *freeblks; int delay; { + struct freework *freework; + struct newblk *newblk; + struct worklist *wk; + ufs_lbn_t lbn; + + TAILQ_REMOVE(adphead, adp, ad_next); + newblk = (struct newblk *)adp; + /* + * If the journal hasn't been written the jnewblk must be passed + * to the call to ffs_freeblk that reclaims the space. We accomplish + * this by linking the journal dependency into the freework to be + * freed when freework_freeblock() is called. If the journal has + * been written we can simply reclaim the journal space when the + * freeblks work is complete. + */ + if (newblk->nb_jnewblk == NULL) { + cancel_newblk(newblk, &freeblks->fb_jwork); + goto found; + } + lbn = newblk->nb_jnewblk->jn_lbn; + /* + * Find the correct freework structure so it releases the canceled + * journal when the bitmap is cleared. This preserves rollback + * until the allocation is reverted. + */ + LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { + freework = WK_FREEWORK(wk); + if (freework->fw_lbn != lbn) + continue; + cancel_newblk(newblk, &freework->fw_jwork); + goto found; + } + panic("cancel_allocdirect: Freework not found for lbn %jd\n", lbn); +found: + if (delay) + WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, + &newblk->nb_list); + else + free_newblk(newblk); + return; +} + + +static void +cancel_newblk(newblk, wkhd) + struct newblk *newblk; + struct workhead *wkhd; +{ + struct indirdep *indirdep; + struct allocindir *aip; + + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { + indirdep->ir_state &= ~ONDEPLIST; + LIST_REMOVE(indirdep, ir_next); + /* + * If an indirdep is not on the buf worklist we need to + * free it here as deallocate_dependencies() will never + * find it. These pointers were never visible on disk and + * can be discarded immediately. + */ + while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { + LIST_REMOVE(aip, ai_next); + cancel_newblk(&aip->ai_block, wkhd); + free_newblk(&aip->ai_block); + } + /* + * If this indirdep is not attached to a buf it was simply + * waiting on completion to clear completehd. free_indirdep() + * asserts that nothing is dangling. + */ + if ((indirdep->ir_state & ONWORKLIST) == 0) + free_indirdep(indirdep); + } + if (newblk->nb_state & ONDEPLIST) { + newblk->nb_state &= ~ONDEPLIST; + LIST_REMOVE(newblk, nb_deps); + } + if (newblk->nb_state & ONWORKLIST) + WORKLIST_REMOVE(&newblk->nb_list); + /* + * If the journal entry hasn't been written we hold onto the dep + * until it is safe to free along with the other journal work. + */ + if (newblk->nb_jnewblk != NULL) { + cancel_jnewblk(newblk->nb_jnewblk, wkhd); + newblk->nb_jnewblk = NULL; + } + if (!LIST_EMPTY(&newblk->nb_jwork)) + jwork_move(wkhd, &newblk->nb_jwork); +} + +/* + * Free a newblk. Generate a new freefrag work request if appropriate. + * This must be called after the inode pointer and any direct block pointers + * are valid or fully removed via truncate or frag extension. + */ +static void +free_newblk(newblk) + struct newblk *newblk; +{ + struct indirdep *indirdep; struct newdirblk *newdirblk; + struct freefrag *freefrag; struct worklist *wk; mtx_assert(&lk, MA_OWNED); - if ((adp->ad_state & DEPCOMPLETE) == 0) - LIST_REMOVE(adp, ad_deps); - TAILQ_REMOVE(adphead, adp, ad_next); - if ((adp->ad_state & COMPLETE) == 0) - WORKLIST_REMOVE(&adp->ad_list); - if (adp->ad_freefrag != NULL) { - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &adp->ad_freefrag->ff_list); - else - add_to_worklist(&adp->ad_freefrag->ff_list); + if (newblk->nb_state & ONDEPLIST) + LIST_REMOVE(newblk, nb_deps); + if (newblk->nb_state & ONWORKLIST) + WORKLIST_REMOVE(&newblk->nb_list); + LIST_REMOVE(newblk, nb_hash); + if ((freefrag = newblk->nb_freefrag) != NULL) { + freefrag->ff_state |= COMPLETE; + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list, 0); } - if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) { + if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) { newdirblk = WK_NEWDIRBLK(wk); WORKLIST_REMOVE(&newdirblk->db_list); - if (!LIST_EMPTY(&adp->ad_newdirblk)) - panic("free_allocdirect: extra newdirblk"); - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &newdirblk->db_list); - else - free_newdirblk(newdirblk); + if (!LIST_EMPTY(&newblk->nb_newdirblk)) + panic("free_newblk: extra newdirblk"); + free_newdirblk(newdirblk); } - WORKITEM_FREE(adp, D_ALLOCDIRECT); + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { + indirdep->ir_state |= DEPCOMPLETE; + indirdep_complete(indirdep); + } + KASSERT(newblk->nb_jnewblk == NULL, + ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); + handle_jwork(&newblk->nb_jwork); + newblk->nb_list.wk_type = D_NEWBLK; + WORKITEM_FREE(newblk, D_NEWBLK); } /* @@ -2554,6 +5627,7 @@ free_newdirblk(newdirblk) { struct pagedep *pagedep; struct diradd *dap; + struct worklist *wk; int i; mtx_assert(&lk, MA_OWNED); @@ -2571,17 +5645,25 @@ free_newdirblk(newdirblk) pagedep->pd_state &= ~NEWBLOCK; if ((pagedep->pd_state & ONWORKLIST) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); + free_diradd(dap, NULL); /* * If no dependencies remain, the pagedep will be freed. */ for (i = 0; i < DAHASHSZ; i++) if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) break; - if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0) { + if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0 && + LIST_EMPTY(&pagedep->pd_jmvrefhd)) { + KASSERT(LIST_FIRST(&pagedep->pd_dirremhd) == NULL, + ("free_newdirblk: Freeing non-free pagedep %p", pagedep)); LIST_REMOVE(pagedep, pd_hash); WORKITEM_FREE(pagedep, D_PAGEDEP); } + /* Should only ever be one item in the list. */ + while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) { + WORKLIST_REMOVE(wk); + handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY); + } WORKITEM_FREE(newdirblk, D_NEWDIRBLK); } @@ -2608,6 +5690,7 @@ softdep_freefile(pvp, ino, mode) freefile->fx_mode = mode; freefile->fx_oldinum = ino; freefile->fx_devvp = ip->i_devvp; + LIST_INIT(&freefile->fx_jwork); if ((ip->i_flag & IN_SPACECOUNTED) == 0) { UFS_LOCK(ip->i_ump); ip->i_fs->fs_pendinginodes += 1; @@ -2618,11 +5701,34 @@ softdep_freefile(pvp, ino, mode) * If the inodedep does not exist, then the zero'ed inode has * been written to disk. If the allocated inode has never been * written to disk, then the on-disk inode is zero'ed. In either - * case we can free the file immediately. + * case we can free the file immediately. If the journal was + * canceled before being written the inode will never make it to + * disk and we must send the canceled journal entrys to + * ffs_freefile() to be cleared in conjunction with the bitmap. + * Any blocks waiting on the inode to write can be safely freed + * here as it will never been written. */ ACQUIRE_LOCK(&lk); - if (inodedep_lookup(pvp->v_mount, ino, 0, &inodedep) == 0 || - check_inode_unwritten(inodedep)) { + inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + /* + * Remove this inode from the unlinked list and set + * GOINGAWAY as appropriate to indicate that this inode + * will never be written. + */ + if (inodedep && inodedep->id_state & UNLINKED) { + /* + * Save the journal work to be freed with the bitmap + * before we clear UNLINKED. Otherwise it can be lost + * if the inode block is written. + */ + handle_bufwait(inodedep, &freefile->fx_jwork); + clear_unlinked_inodedep(inodedep); + /* Re-acquire inodedep as we've dropped lk. */ + inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) + inodedep->id_state |= GOINGAWAY; + } + if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); handle_workitem_freefile(freefile); return; @@ -2654,7 +5760,8 @@ check_inode_unwritten(inodedep) { mtx_assert(&lk, MA_OWNED); - if ((inodedep->id_state & DEPCOMPLETE) != 0 || + + if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 || !LIST_EMPTY(&inodedep->id_pendinghd) || !LIST_EMPTY(&inodedep->id_bufwait) || !LIST_EMPTY(&inodedep->id_inowait) || @@ -2662,9 +5769,9 @@ check_inode_unwritten(inodedep) !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || + inodedep->id_mkdiradd != NULL || inodedep->id_nlinkdelta != 0) return (0); - /* * Another process might be in initiate_write_inodeblock_ufs[12] * trying to allocate memory without holding "Softdep Lock". @@ -2673,9 +5780,11 @@ check_inode_unwritten(inodedep) inodedep->id_savedino1 == NULL) return (0); + if (inodedep->id_state & ONDEPLIST) + LIST_REMOVE(inodedep, id_deps); + inodedep->id_state &= ~ONDEPLIST; inodedep->id_state |= ALLCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; + inodedep->id_bmsafemap = NULL; if (inodedep->id_state & ONWORKLIST) WORKLIST_REMOVE(&inodedep->id_list); if (inodedep->id_savedino1 != NULL) { @@ -2696,23 +5805,149 @@ free_inodedep(inodedep) { mtx_assert(&lk, MA_OWNED); - if ((inodedep->id_state & ONWORKLIST) != 0 || + if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 || (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE || + !LIST_EMPTY(&inodedep->id_dirremhd) || !LIST_EMPTY(&inodedep->id_pendinghd) || !LIST_EMPTY(&inodedep->id_bufwait) || !LIST_EMPTY(&inodedep->id_inowait) || + !TAILQ_EMPTY(&inodedep->id_inoreflst) || !TAILQ_EMPTY(&inodedep->id_inoupdt) || !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || - inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) + inodedep->id_mkdiradd != NULL || + inodedep->id_nlinkdelta != 0 || + inodedep->id_savedino1 != NULL) return (0); + if (inodedep->id_state & ONDEPLIST) + LIST_REMOVE(inodedep, id_deps); LIST_REMOVE(inodedep, id_hash); WORKITEM_FREE(inodedep, D_INODEDEP); num_inodedep -= 1; return (1); } +/* + * Free the block referenced by a freework structure. The parent freeblks + * structure is released and completed when the final cg bitmap reaches + * the disk. This routine may be freeing a jnewblk which never made it to + * disk in which case we do not have to wait as the operation is undone + * in memory immediately. + */ +static void +freework_freeblock(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct ufsmount *ump; + struct workhead wkhd; + struct fs *fs; + int complete; + int pending; + int bsize; + int needj; + + freeblks = freework->fw_freeblks; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + fs = ump->um_fs; + needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ; + complete = 0; + LIST_INIT(&wkhd); + /* + * If we are canceling an existing jnewblk pass it to the free + * routine, otherwise pass the freeblk which will ultimately + * release the freeblks. If we're not journaling, we can just + * free the freeblks immediately. + */ + if (!LIST_EMPTY(&freework->fw_jwork)) { + LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list); + complete = 1; + } else if (needj) + WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list); + bsize = lfragtosize(fs, freework->fw_frags); + pending = btodb(bsize); + ACQUIRE_LOCK(&lk); + freeblks->fb_chkcnt -= pending; + FREE_LOCK(&lk); + /* + * extattr blocks don't show up in pending blocks. XXX why? + */ + if (freework->fw_lbn >= 0 || freework->fw_lbn <= -NDADDR) { + UFS_LOCK(ump); + fs->fs_pendingblocks -= pending; + UFS_UNLOCK(ump); + } + ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, + bsize, freeblks->fb_previousinum, &wkhd); + if (complete == 0 && needj) + return; + /* + * The jnewblk will be discarded and the bits in the map never + * made it to disk. We can immediately free the freeblk. + */ + ACQUIRE_LOCK(&lk); + handle_written_freework(freework); + FREE_LOCK(&lk); +} + +/* + * Start, continue, or finish the process of freeing an indirect block tree. + * The free operation may be paused at any point with fw_off containing the + * offset to restart from. This enables us to implement some flow control + * for large truncates which may fan out and generate a huge number of + * dependencies. + */ +static void +handle_workitem_indirblk(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct ufsmount *ump; + struct fs *fs; + + + freeblks = freework->fw_freeblks; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + fs = ump->um_fs; + if (freework->fw_off == NINDIR(fs)) + freework_freeblock(freework); + else + indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), + freework->fw_lbn); +} + +/* + * Called when a freework structure attached to a cg buf is written. The + * ref on either the parent or the freeblks structure is released and + * either may be added to the worklist if it is the final ref. + */ +static void +handle_written_freework(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct freework *parent; + + freeblks = freework->fw_freeblks; + parent = freework->fw_parent; + if (parent) { + if (--parent->fw_ref != 0) + parent = NULL; + freeblks = NULL; + } else if (--freeblks->fb_ref != 0) + freeblks = NULL; + WORKITEM_FREE(freework, D_FREEWORK); + /* + * Don't delay these block frees or it takes an intolerable amount + * of time to process truncates and free their journal entries. + */ + if (freeblks) + add_to_worklist(&freeblks->fb_list, 1); + if (parent) + add_to_worklist(&parent->fw_list, 1); +} + /* * This workitem routine performs the block de-allocation. * The workitem is added to the pending list after the updated @@ -2725,100 +5960,80 @@ static void handle_workitem_freeblocks(freeblks, flags) struct freeblks *freeblks; int flags; +{ + struct freework *freework; + struct worklist *wk; + + KASSERT(LIST_EMPTY(&freeblks->fb_jfreeblkhd), + ("handle_workitem_freeblocks: Journal entries not written.")); + if (LIST_EMPTY(&freeblks->fb_freeworkhd)) { + handle_complete_freeblocks(freeblks); + return; + } + freeblks->fb_ref++; + while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) { + KASSERT(wk->wk_type == D_FREEWORK, + ("handle_workitem_freeblocks: Unknown type %s", + TYPENAME(wk->wk_type))); + WORKLIST_REMOVE_UNLOCKED(wk); + freework = WK_FREEWORK(wk); + if (freework->fw_lbn <= -NDADDR) + handle_workitem_indirblk(freework); + else + freework_freeblock(freework); + } + ACQUIRE_LOCK(&lk); + if (--freeblks->fb_ref != 0) + freeblks = NULL; + FREE_LOCK(&lk); + if (freeblks) + handle_complete_freeblocks(freeblks); +} + +/* + * Once all of the freework workitems are complete we can retire the + * freeblocks dependency and any journal work awaiting completion. This + * can not be called until all other dependencies are stable on disk. + */ +static void +handle_complete_freeblocks(freeblks) + struct freeblks *freeblks; { struct inode *ip; struct vnode *vp; struct fs *fs; struct ufsmount *ump; - int i, nblocks, level, bsize; - ufs2_daddr_t bn, blocksreleased = 0; - int error, allerror = 0; - ufs_lbn_t baselbns[NIADDR], tmpval; - int fs_pendingblocks; + int flags; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - fs_pendingblocks = 0; - tmpval = 1; - baselbns[0] = NDADDR; - for (i = 1; i < NIADDR; i++) { - tmpval *= NINDIR(fs); - baselbns[i] = baselbns[i - 1] + tmpval; - } - nblocks = btodb(fs->fs_bsize); - blocksreleased = 0; - /* - * Release all extended attribute blocks or frags. - */ - if (freeblks->fb_oldextsize > 0) { - for (i = (NXADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_eblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldextsize, i); - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - blocksreleased += btodb(bsize); - } - } - /* - * Release all data blocks or frags. - */ - if (freeblks->fb_oldsize > 0) { - /* - * Indirect blocks first. - */ - for (level = (NIADDR - 1); level >= 0; level--) { - if ((bn = freeblks->fb_iblks[level]) == 0) - continue; - if ((error = indir_trunc(freeblks, fsbtodb(fs, bn), - level, baselbns[level], &blocksreleased)) != 0) - allerror = error; - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, - fs->fs_bsize, freeblks->fb_previousinum); - fs_pendingblocks += nblocks; - blocksreleased += nblocks; - } - /* - * All direct blocks or frags. - */ - for (i = (NDADDR - 1); i >= 0; i--) { - if ((bn = freeblks->fb_dblks[i]) == 0) - continue; - bsize = sblksize(fs, freeblks->fb_oldsize, i); - ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize, - freeblks->fb_previousinum); - fs_pendingblocks += btodb(bsize); - blocksreleased += btodb(bsize); - } - } - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); + flags = LK_NOWAIT; + /* * If we still have not finished background cleanup, then check * to see if the block count needs to be adjusted. */ - if (freeblks->fb_chkcnt != blocksreleased && - (fs->fs_flags & FS_UNCLEAN) != 0 && + if (freeblks->fb_chkcnt != 0 && (fs->fs_flags & FS_UNCLEAN) != 0 && ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) - == 0) { + (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) == 0) { ip = VTOI(vp); - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \ - freeblks->fb_chkcnt - blocksreleased); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + freeblks->fb_chkcnt); ip->i_flag |= IN_CHANGE; vput(vp); } #ifdef INVARIANTS - if (freeblks->fb_chkcnt != blocksreleased && + if (freeblks->fb_chkcnt != 0 && ((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0)) printf("handle_workitem_freeblocks: block count\n"); - if (allerror) - softdep_error("handle_workitem_freeblks", allerror); #endif /* INVARIANTS */ ACQUIRE_LOCK(&lk); + /* + * All of the freeblock deps must be complete prior to this call + * so it's now safe to complete earlier outstanding journal entries. + */ + handle_jwork(&freeblks->fb_jwork); WORKITEM_FREE(freeblks, D_FREEBLKS); num_freeblkdep--; FREE_LOCK(&lk); @@ -2830,29 +6045,42 @@ handle_workitem_freeblocks(freeblks, flags) * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. */ -static int -indir_trunc(freeblks, dbn, level, lbn, countp) - struct freeblks *freeblks; +static void +indir_trunc(freework, dbn, lbn) + struct freework *freework; ufs2_daddr_t dbn; - int level; ufs_lbn_t lbn; - ufs2_daddr_t *countp; { + struct freework *nfreework; + struct workhead wkhd; + struct jnewblk *jnewblk; + struct freeblks *freeblks; struct buf *bp; struct fs *fs; + struct worklist *wkn; struct worklist *wk; struct indirdep *indirdep; struct ufsmount *ump; ufs1_daddr_t *bap1 = 0; - ufs2_daddr_t nb, *bap2 = 0; + ufs2_daddr_t nb, nnb, *bap2 = 0; ufs_lbn_t lbnadd; int i, nblocks, ufs1fmt; - int error, allerror = 0; int fs_pendingblocks; + int freedeps; + int needj; + int level; + int cnt; + LIST_INIT(&wkhd); + level = lbn_level(lbn); + if (level == -1) + panic("indir_trunc: Invalid lbn %jd\n", lbn); + freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; fs_pendingblocks = 0; + freedeps = 0; + needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); @@ -2877,13 +6105,14 @@ indir_trunc(freeblks, dbn, level, lbn, countp) ACQUIRE_LOCK(&lk); if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) { if (wk->wk_type != D_INDIRDEP || - (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp || - (indirdep->ir_state & GOINGAWAY) == 0) - panic("indir_trunc: lost indirdep"); - WORKLIST_REMOVE(wk); - WORKITEM_FREE(indirdep, D_INDIRDEP); + (wk->wk_state & GOINGAWAY) == 0) + panic("indir_trunc: lost indirdep %p", wk); + indirdep = WK_INDIRDEP(wk); + LIST_SWAP(&wkhd, &indirdep->ir_jwork, worklist, wk_list); + free_indirdep(indirdep); if (!LIST_EMPTY(&bp->b_dep)) - panic("indir_trunc: dangling dep"); + panic("indir_trunc: dangling dep %p", + LIST_FIRST(&bp->b_dep)); ump->um_numindirdeps -= 1; FREE_LOCK(&lk); } else { @@ -2892,11 +6121,10 @@ indir_trunc(freeblks, dbn, level, lbn, countp) brelse(bp); #endif FREE_LOCK(&lk); - error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp); - if (error) { + if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, + NOCRED, &bp) != 0) { brelse(bp); - return (error); + return; } } /* @@ -2909,57 +6137,264 @@ indir_trunc(freeblks, dbn, level, lbn, countp) ufs1fmt = 0; bap2 = (ufs2_daddr_t *)bp->b_data; } - nblocks = btodb(fs->fs_bsize); - for (i = NINDIR(fs) - 1; i >= 0; i--) { - if (ufs1fmt) - nb = bap1[i]; + /* + * Reclaim indirect blocks which never made it to disk. + */ + cnt = 0; + LIST_FOREACH_SAFE(wk, &wkhd, wk_list, wkn) { + struct workhead freewk; + if (wk->wk_type != D_JNEWBLK) + continue; + WORKLIST_REMOVE_UNLOCKED(wk); + LIST_INIT(&freewk); + WORKLIST_INSERT_UNLOCKED(&freewk, wk); + jnewblk = WK_JNEWBLK(wk); + if (jnewblk->jn_lbn > 0) + i = (jnewblk->jn_lbn - -lbn) / lbnadd; else + i = (jnewblk->jn_lbn - (lbn + 1)) / lbnadd; + KASSERT(i >= 0 && i < NINDIR(fs), + ("indir_trunc: Index out of range %d parent %jd lbn %jd", + i, lbn, jnewblk->jn_lbn)); + /* Clear the pointer so it isn't found below. */ + if (ufs1fmt) { + nb = bap1[i]; + bap1[i] = 0; + } else { nb = bap2[i]; + bap2[i] = 0; + } + KASSERT(nb == jnewblk->jn_blkno, + ("indir_trunc: Block mismatch %jd != %jd", + nb, jnewblk->jn_blkno)); + ffs_blkfree(ump, fs, freeblks->fb_devvp, jnewblk->jn_blkno, + fs->fs_bsize, freeblks->fb_previousinum, &freewk); + cnt++; + } + ACQUIRE_LOCK(&lk); + if (needj) + freework->fw_ref += NINDIR(fs) + 1; + /* Any remaining journal work can be completed with freeblks. */ + jwork_move(&freeblks->fb_jwork, &wkhd); + FREE_LOCK(&lk); + nblocks = btodb(fs->fs_bsize); + if (ufs1fmt) + nb = bap1[0]; + else + nb = bap2[0]; + nfreework = freework; + /* + * Reclaim on disk blocks. + */ + for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) { + if (i != NINDIR(fs) - 1) { + if (ufs1fmt) + nnb = bap1[i+1]; + else + nnb = bap2[i+1]; + } else + nnb = 0; if (nb == 0) continue; + cnt++; if (level != 0) { - if ((error = indir_trunc(freeblks, fsbtodb(fs, nb), - level - 1, lbn + (i * lbnadd), countp)) != 0) - allerror = error; + ufs_lbn_t nlbn; + + nlbn = (lbn + 1) - (i * lbnadd); + if (needj != 0) { + nfreework = newfreework(freeblks, freework, + nlbn, nb, fs->fs_frag, 0); + freedeps++; + } + indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); + } else { + struct freedep *freedep; + + /* + * Attempt to aggregate freedep dependencies for + * all blocks being released to the same CG. + */ + LIST_INIT(&wkhd); + if (needj != 0 && + (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) { + freedep = newfreedep(freework); + WORKLIST_INSERT_UNLOCKED(&wkhd, + &freedep->fd_list); + freedeps++; + } + ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, + fs->fs_bsize, freeblks->fb_previousinum, &wkhd); } - ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize, - freeblks->fb_previousinum); - fs_pendingblocks += nblocks; - *countp += nblocks; } - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); + if (level == 0) + fs_pendingblocks = (nblocks * cnt); + /* + * If we're not journaling we can free the indirect now. Otherwise + * setup the ref counts and offset so this indirect can be completed + * when its children are free. + */ + if (needj == 0) { + fs_pendingblocks += nblocks; + dbn = dbtofsb(fs, dbn); + ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, + freeblks->fb_previousinum, NULL); + ACQUIRE_LOCK(&lk); + freeblks->fb_chkcnt -= fs_pendingblocks; + if (freework->fw_blkno == dbn) + handle_written_freework(freework); + FREE_LOCK(&lk); + freework = NULL; + } else { + ACQUIRE_LOCK(&lk); + freework->fw_off = i; + freework->fw_ref += freedeps; + freework->fw_ref -= NINDIR(fs) + 1; + if (freework->fw_ref != 0) + freework = NULL; + freeblks->fb_chkcnt -= fs_pendingblocks; + FREE_LOCK(&lk); + } + if (fs_pendingblocks) { + UFS_LOCK(ump); + fs->fs_pendingblocks -= fs_pendingblocks; + UFS_UNLOCK(ump); + } bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); - return (allerror); + if (freework) + handle_workitem_indirblk(freework); + return; } /* - * Free an allocindir. - * This routine must be called with splbio interrupts blocked. + * Cancel an allocindir when it is removed via truncation. */ static void -free_allocindir(aip, inodedep) +cancel_allocindir(aip, inodedep, freeblks) struct allocindir *aip; struct inodedep *inodedep; + struct freeblks *freeblks; { - struct freefrag *freefrag; + struct newblk *newblk; - mtx_assert(&lk, MA_OWNED); - if ((aip->ai_state & DEPCOMPLETE) == 0) - LIST_REMOVE(aip, ai_deps); - if (aip->ai_state & ONWORKLIST) - WORKLIST_REMOVE(&aip->ai_list); + /* + * If the journal hasn't been written the jnewblk must be passed + * to the call to ffs_freeblk that reclaims the space. We accomplish + * this by linking the journal dependency into the indirdep to be + * freed when indir_trunc() is called. If the journal has already + * been written we can simply reclaim the journal space when the + * freeblks work is complete. + */ LIST_REMOVE(aip, ai_next); - if ((freefrag = aip->ai_freefrag) != NULL) { - if (inodedep == NULL) - add_to_worklist(&freefrag->ff_list); - else - WORKLIST_INSERT(&inodedep->id_bufwait, - &freefrag->ff_list); + newblk = (struct newblk *)aip; + if (newblk->nb_jnewblk == NULL) + cancel_newblk(newblk, &freeblks->fb_jwork); + else + cancel_newblk(newblk, &aip->ai_indirdep->ir_jwork); + if (inodedep && inodedep->id_state & DEPCOMPLETE) + WORKLIST_INSERT(&inodedep->id_bufwait, &newblk->nb_list); + else + free_newblk(newblk); +} + +/* + * Create the mkdir dependencies for . and .. in a new directory. Link them + * in to a newdirblk so any subsequent additions are tracked properly. The + * caller is responsible for adding the mkdir1 dependency to the journal + * and updating id_mkdiradd. This function returns with lk held. + */ +static struct mkdir * +setup_newdir(dap, newinum, dinum, newdirbp, mkdirp) + struct diradd *dap; + ino_t newinum; + ino_t dinum; + struct buf *newdirbp; + struct mkdir **mkdirp; +{ + struct newblk *newblk; + struct pagedep *pagedep; + struct inodedep *inodedep; + struct newdirblk *newdirblk = 0; + struct mkdir *mkdir1, *mkdir2; + struct worklist *wk; + struct jaddref *jaddref; + struct mount *mp; + + mp = dap->da_list.wk_mp; + newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK, + M_SOFTDEP_FLAGS); + workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp); + LIST_INIT(&newdirblk->db_mkdir); + mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS); + workitem_alloc(&mkdir1->md_list, D_MKDIR, mp); + mkdir1->md_state = ATTACHED | MKDIR_BODY; + mkdir1->md_diradd = dap; + mkdir1->md_jaddref = NULL; + mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS); + workitem_alloc(&mkdir2->md_list, D_MKDIR, mp); + mkdir2->md_state = ATTACHED | MKDIR_PARENT; + mkdir2->md_diradd = dap; + mkdir2->md_jaddref = NULL; + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) { + mkdir1->md_state |= DEPCOMPLETE; + mkdir2->md_state |= DEPCOMPLETE; } - WORKITEM_FREE(aip, D_ALLOCINDIR); + /* + * Dependency on "." and ".." being written to disk. + */ + mkdir1->md_buf = newdirbp; + ACQUIRE_LOCK(&lk); + LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); + /* + * We must link the pagedep, allocdirect, and newdirblk for + * the initial file page so the pointer to the new directory + * is not written until the directory contents are live and + * any subsequent additions are not marked live until the + * block is reachable via the inode. + */ + if (pagedep_lookup(mp, newinum, 0, 0, &pagedep) == 0) + panic("setup_newdir: lost pagedep"); + LIST_FOREACH(wk, &newdirbp->b_dep, wk_list) + if (wk->wk_type == D_ALLOCDIRECT) + break; + if (wk == NULL) + panic("setup_newdir: lost allocdirect"); + newblk = WK_NEWBLK(wk); + pagedep->pd_state |= NEWBLOCK; + pagedep->pd_newdirblk = newdirblk; + newdirblk->db_pagedep = pagedep; + WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list); + WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list); + /* + * Look up the inodedep for the parent directory so that we + * can link mkdir2 into the pending dotdot jaddref or + * the inode write if there is none. If the inode is + * ALLCOMPLETE and no jaddref is present all dependencies have + * been satisfied and mkdir2 can be freed. + */ + inodedep_lookup(mp, dinum, 0, &inodedep); + if (mp->mnt_kern_flag & MNTK_SUJ) { + if (inodedep == NULL) + panic("setup_newdir: Lost parent."); + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref != NULL && jaddref->ja_parent == newinum && + (jaddref->ja_state & MKDIR_PARENT), + ("setup_newdir: bad dotdot jaddref %p", jaddref)); + LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); + mkdir2->md_jaddref = jaddref; + jaddref->ja_mkdir = mkdir2; + } else if (inodedep == NULL || + (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { + dap->da_state &= ~MKDIR_PARENT; + WORKITEM_FREE(mkdir2, D_MKDIR); + } else { + LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); + WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); + } + *mkdirp = mkdir2; + + return (mkdir1); } /* @@ -2998,12 +6433,14 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) ufs_lbn_t lbn; /* block in directory containing new entry */ struct fs *fs; struct diradd *dap; - struct allocdirect *adp; + struct newblk *newblk; struct pagedep *pagedep; struct inodedep *inodedep; struct newdirblk *newdirblk = 0; struct mkdir *mkdir1, *mkdir2; + struct jaddref *jaddref; struct mount *mp; + int isindir; /* * Whiteouts have no dependencies. @@ -3013,6 +6450,8 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) bdwrite(newdirbp); return (0); } + jaddref = NULL; + mkdir1 = mkdir2 = NULL; mp = UFSTOVFS(dp->i_ump); fs = dp->i_fs; lbn = lblkno(fs, diroffset); @@ -3023,111 +6462,123 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) dap->da_offset = offset; dap->da_newinum = newinum; dap->da_state = ATTACHED; - if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) { + LIST_INIT(&dap->da_jwork); + isindir = bp->b_lblkno >= NDADDR; + if (isnewblk && + (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) { newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK, M_SOFTDEP_FLAGS); workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp); + LIST_INIT(&newdirblk->db_mkdir); } + /* + * If we're creating a new directory setup the dependencies and set + * the dap state to wait for them. Otherwise it's COMPLETE and + * we can move on. + */ if (newdirbp == NULL) { dap->da_state |= DEPCOMPLETE; ACQUIRE_LOCK(&lk); } else { dap->da_state |= MKDIR_BODY | MKDIR_PARENT; - mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, - M_SOFTDEP_FLAGS); - workitem_alloc(&mkdir1->md_list, D_MKDIR, mp); - mkdir1->md_state = MKDIR_BODY; - mkdir1->md_diradd = dap; - mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, - M_SOFTDEP_FLAGS); - workitem_alloc(&mkdir2->md_list, D_MKDIR, mp); - mkdir2->md_state = MKDIR_PARENT; - mkdir2->md_diradd = dap; - /* - * Dependency on "." and ".." being written to disk. - */ - mkdir1->md_buf = newdirbp; - ACQUIRE_LOCK(&lk); - LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs); - WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list); - FREE_LOCK(&lk); - bdwrite(newdirbp); - /* - * Dependency on link count increase for parent directory - */ - ACQUIRE_LOCK(&lk); - if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0 - || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dap->da_state &= ~MKDIR_PARENT; - WORKITEM_FREE(mkdir2, D_MKDIR); - } else { - LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); - } + mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp, + &mkdir2); } /* * Link into parent directory pagedep to await its being written. */ - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) + if (pagedep_lookup(mp, dp->i_number, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); +#ifdef DEBUG + if (diradd_lookup(pagedep, offset) != NULL) + panic("softdep_setup_directory_add: %p already at off %d\n", + diradd_lookup(pagedep, offset), offset); +#endif dap->da_pagedep = pagedep; LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap, da_pdlist); + inodedep_lookup(mp, newinum, DEPALLOC, &inodedep); /* - * Link into its inodedep. Put it on the id_bufwait list if the inode - * is not yet written. If it is written, do the post-inode write - * processing to put it on the id_pendinghd list. + * If we're journaling, link the diradd into the jaddref so it + * may be completed after the journal entry is written. Otherwise, + * link the diradd into its inodedep. If the inode is not yet + * written place it on the bufwait list, otherwise do the post-inode + * write processing to put it on the id_pendinghd list. */ - (void) inodedep_lookup(mp, newinum, DEPALLOC, &inodedep); - if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) + if (mp->mnt_kern_flag & MNTK_SUJ) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, + ("softdep_setup_directory_add: bad jaddref %p", jaddref)); + jaddref->ja_diroff = diroffset; + jaddref->ja_diradd = dap; + add_to_journal(&jaddref->ja_list); + } else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) diradd_inode_written(dap, inodedep); else WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); - if (isnewblk) { + /* + * Add the journal entries for . and .. links now that the primary + * link is written. + */ + if (mkdir1 != NULL && mp->mnt_kern_flag & MNTK_SUJ) { + jaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref, + inoreflst, if_deps); + KASSERT(jaddref != NULL && + jaddref->ja_ino == jaddref->ja_parent && + (jaddref->ja_state & MKDIR_BODY), + ("softdep_setup_directory_add: bad dot jaddref %p", + jaddref)); + mkdir1->md_jaddref = jaddref; + jaddref->ja_mkdir = mkdir1; /* - * Directories growing into indirect blocks are rare - * enough and the frequency of new block allocation - * in those cases even more rare, that we choose not - * to bother tracking them. Rather we simply force the - * new directory entry to disk. + * It is important that the dotdot journal entry + * is added prior to the dot entry since dot writes + * both the dot and dotdot links. These both must + * be added after the primary link for the journal + * to remain consistent. */ - if (lbn >= NDADDR) { - FREE_LOCK(&lk); - /* - * We only have a new allocation when at the - * beginning of a new block, not when we are - * expanding into an existing block. - */ - if (blkoff(fs, diroffset) == 0) - return (1); - return (0); - } + add_to_journal(&mkdir2->md_jaddref->ja_list); + add_to_journal(&jaddref->ja_list); + } + /* + * If we are adding a new directory remember this diradd so that if + * we rename it we can keep the dot and dotdot dependencies. If + * we are adding a new name for an inode that has a mkdiradd we + * must be in rename and we have to move the dot and dotdot + * dependencies to this new name. The old name is being orphaned + * soon. + */ + if (mkdir1 != NULL) { + if (inodedep->id_mkdiradd != NULL) + panic("softdep_setup_directory_add: Existing mkdir"); + inodedep->id_mkdiradd = dap; + } else if (inodedep->id_mkdiradd) + merge_diradd(inodedep, dap); + if (newdirblk) { /* - * We only have a new allocation when at the beginning - * of a new fragment, not when we are expanding into an - * existing fragment. Also, there is nothing to do if we - * are already tracking this block. + * There is nothing to do if we are already tracking + * this block. */ - if (fragoff(fs, diroffset) != 0) { - FREE_LOCK(&lk); - return (0); - } if ((pagedep->pd_state & NEWBLOCK) != 0) { WORKITEM_FREE(newdirblk, D_NEWDIRBLK); FREE_LOCK(&lk); return (0); } - /* - * Find our associated allocdirect and have it track us. - */ - if (inodedep_lookup(mp, dp->i_number, 0, &inodedep) == 0) - panic("softdep_setup_directory_add: lost inodedep"); - adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst); - if (adp == NULL || adp->ad_lbn != lbn) + if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk) + == 0) panic("softdep_setup_directory_add: lost entry"); + WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list); pagedep->pd_state |= NEWBLOCK; + pagedep->pd_newdirblk = newdirblk; newdirblk->db_pagedep = pagedep; - WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list); + FREE_LOCK(&lk); + /* + * If we extended into an indirect signal direnter to sync. + */ + if (isindir) + return (1); + return (0); } FREE_LOCK(&lk); return (0); @@ -3141,7 +6592,8 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) * occur while the move is in progress. */ void -softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) +softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize) + struct buf *bp; /* Buffer holding directory block. */ struct inode *dp; /* inode for directory */ caddr_t base; /* address of dp->i_offset */ caddr_t oldloc; /* address of old directory location */ @@ -3150,49 +6602,214 @@ softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize) { int offset, oldoffset, newoffset; struct pagedep *pagedep; + struct jmvref *jmvref; struct diradd *dap; + struct direct *de; + struct mount *mp; ufs_lbn_t lbn; + int flags; - ACQUIRE_LOCK(&lk); + mp = UFSTOVFS(dp->i_ump); + de = (struct direct *)oldloc; + jmvref = NULL; + flags = 0; + /* + * Moves are always journaled as it would be too complex to + * determine if any affected adds or removes are present in the + * journal. + */ + if (mp->mnt_kern_flag & MNTK_SUJ) { + flags = DEPALLOC; + jmvref = newjmvref(dp, de->d_ino, + dp->i_offset + (oldloc - base), + dp->i_offset + (newloc - base)); + } lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0) - goto done; oldoffset = offset + (oldloc - base); newoffset = offset + (newloc - base); - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) { - if (dap->da_offset != oldoffset) - continue; - dap->da_offset = newoffset; - if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset)) - break; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)], - dap, da_pdlist); - break; + ACQUIRE_LOCK(&lk); + if (pagedep_lookup(mp, dp->i_number, lbn, flags, &pagedep) == 0) { + if (pagedep) + WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + goto done; } - if (dap == NULL) { - - LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) { - if (dap->da_offset == oldoffset) { - dap->da_offset = newoffset; - break; - } + dap = diradd_lookup(pagedep, oldoffset); + if (dap) { + dap->da_offset = newoffset; + newoffset = DIRADDHASH(newoffset); + oldoffset = DIRADDHASH(oldoffset); + if ((dap->da_state & ALLCOMPLETE) != ALLCOMPLETE && + newoffset != oldoffset) { + LIST_REMOVE(dap, da_pdlist); + LIST_INSERT_HEAD(&pagedep->pd_diraddhd[newoffset], + dap, da_pdlist); } } done: + if (jmvref) { + jmvref->jm_pagedep = pagedep; + LIST_INSERT_HEAD(&pagedep->pd_jmvrefhd, jmvref, jm_deps); + add_to_journal(&jmvref->jm_list); + } bcopy(oldloc, newloc, entrysize); FREE_LOCK(&lk); } +/* + * Move the mkdir dependencies and journal work from one diradd to another + * when renaming a directory. The new name must depend on the mkdir deps + * completing as the old name did. Directories can only have one valid link + * at a time so one must be canonical. + */ +static void +merge_diradd(inodedep, newdap) + struct inodedep *inodedep; + struct diradd *newdap; +{ + struct diradd *olddap; + struct mkdir *mkdir, *nextmd; + short state; + + olddap = inodedep->id_mkdiradd; + inodedep->id_mkdiradd = newdap; + if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { + newdap->da_state &= ~DEPCOMPLETE; + for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) { + nextmd = LIST_NEXT(mkdir, md_mkdirs); + if (mkdir->md_diradd != olddap) + continue; + mkdir->md_diradd = newdap; + state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY); + newdap->da_state |= state; + olddap->da_state &= ~state; + if ((olddap->da_state & + (MKDIR_PARENT | MKDIR_BODY)) == 0) + break; + } + if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) + panic("merge_diradd: unfound ref"); + } + /* + * Any mkdir related journal items are not safe to be freed until + * the new name is stable. + */ + jwork_move(&newdap->da_jwork, &olddap->da_jwork); + olddap->da_state |= DEPCOMPLETE; + complete_diradd(olddap); +} + +/* + * Move the diradd to the pending list when all diradd dependencies are + * complete. + */ +static void +complete_diradd(dap) + struct diradd *dap; +{ + struct pagedep *pagedep; + + if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { + if (dap->da_state & DIRCHG) + pagedep = dap->da_previous->dm_pagedep; + else + pagedep = dap->da_pagedep; + LIST_REMOVE(dap, da_pdlist); + LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); + } +} + +/* + * Cancel a diradd when a dirrem overlaps with it. We must cancel the journal + * add entries and conditonally journal the remove. + */ +static void +cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref) + struct diradd *dap; + struct dirrem *dirrem; + struct jremref *jremref; + struct jremref *dotremref; + struct jremref *dotdotremref; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct inoref *inoref; + struct mkdir *mkdir; + + /* + * If no remove references were allocated we're on a non-journaled + * filesystem and can skip the cancel step. + */ + if (jremref == NULL) { + free_diradd(dap, NULL); + return; + } + /* + * Cancel the primary name an free it if it does not require + * journaling. + */ + if (inodedep_lookup(dap->da_list.wk_mp, dap->da_newinum, + 0, &inodedep) != 0) { + /* Abort the addref that reference this diradd. */ + TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { + if (inoref->if_list.wk_type != D_JADDREF) + continue; + jaddref = (struct jaddref *)inoref; + if (jaddref->ja_diradd != dap) + continue; + if (cancel_jaddref(jaddref, inodedep, + &dirrem->dm_jwork) == 0) { + free_jremref(jremref); + jremref = NULL; + } + break; + } + } + /* + * Cancel subordinate names and free them if they do not require + * journaling. + */ + if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { + LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) { + if (mkdir->md_diradd != dap) + continue; + if ((jaddref = mkdir->md_jaddref) == NULL) + continue; + mkdir->md_jaddref = NULL; + if (mkdir->md_state & MKDIR_PARENT) { + if (cancel_jaddref(jaddref, NULL, + &dirrem->dm_jwork) == 0) { + free_jremref(dotdotremref); + dotdotremref = NULL; + } + } else { + if (cancel_jaddref(jaddref, inodedep, + &dirrem->dm_jwork) == 0) { + free_jremref(dotremref); + dotremref = NULL; + } + } + } + } + + if (jremref) + journal_jremref(dirrem, jremref, inodedep); + if (dotremref) + journal_jremref(dirrem, dotremref, inodedep); + if (dotdotremref) + journal_jremref(dirrem, dotdotremref, NULL); + jwork_move(&dirrem->dm_jwork, &dap->da_jwork); + free_diradd(dap, &dirrem->dm_jwork); +} + /* * Free a diradd dependency structure. This routine must be called * with splbio interrupts blocked. */ static void -free_diradd(dap) +free_diradd(dap, wkhd) struct diradd *dap; + struct workhead *wkhd; { struct dirrem *dirrem; struct pagedep *pagedep; @@ -3200,32 +6817,48 @@ free_diradd(dap) struct mkdir *mkdir, *nextmd; mtx_assert(&lk, MA_OWNED); - WORKLIST_REMOVE(&dap->da_list); LIST_REMOVE(dap, da_pdlist); + if (dap->da_state & ONWORKLIST) + WORKLIST_REMOVE(&dap->da_list); if ((dap->da_state & DIRCHG) == 0) { pagedep = dap->da_pagedep; } else { dirrem = dap->da_previous; pagedep = dirrem->dm_pagedep; dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + dirrem->dm_state |= COMPLETE; + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list, 0); } if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum, 0, &inodedep) != 0) - (void) free_inodedep(inodedep); + if (inodedep->id_mkdiradd == dap) + inodedep->id_mkdiradd = NULL; if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) { for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) { nextmd = LIST_NEXT(mkdir, md_mkdirs); if (mkdir->md_diradd != dap) continue; - dap->da_state &= ~mkdir->md_state; - WORKLIST_REMOVE(&mkdir->md_list); + dap->da_state &= + ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)); LIST_REMOVE(mkdir, md_mkdirs); + if (mkdir->md_state & ONWORKLIST) + WORKLIST_REMOVE(&mkdir->md_list); + if (mkdir->md_jaddref != NULL) + panic("free_diradd: Unexpected jaddref"); WORKITEM_FREE(mkdir, D_MKDIR); + if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) + break; } if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) panic("free_diradd: unfound ref"); } + if (inodedep) + free_inodedep(inodedep); + /* + * Free any journal segments waiting for the directory write. + */ + handle_jwork(&dap->da_jwork); WORKITEM_FREE(dap, D_DIRADD); } @@ -3254,11 +6887,24 @@ softdep_setup_remove(bp, dp, ip, isrmdir) int isrmdir; /* indicates if doing RMDIR */ { struct dirrem *dirrem, *prevdirrem; + struct inodedep *inodedep; + int direct; /* - * Allocate a new dirrem if appropriate and ACQUIRE_LOCK. + * Allocate a new dirrem if appropriate and ACQUIRE_LOCK. We want + * newdirrem() to setup the full directory remove which requires + * isrmdir > 1. */ - dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem); + dirrem = newdirrem(bp, dp, ip, isrmdir?2:0, &prevdirrem); + /* + * Add the dirrem to the inodedep's pending remove list for quick + * discovery later. + */ + if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, + &inodedep) == 0) + panic("softdep_setup_remove: Lost inodedep."); + dirrem->dm_state |= ONDEPLIST; + LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext); /* * If the COMPLETE flag is clear, then there were no active @@ -3280,11 +6926,148 @@ softdep_setup_remove(bp, dp, ip, isrmdir) LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, prevdirrem, dm_next); dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino; + direct = LIST_EMPTY(&dirrem->dm_jremrefhd); FREE_LOCK(&lk); - handle_workitem_remove(dirrem, NULL); + if (direct) + handle_workitem_remove(dirrem, NULL); } } +/* + * Check for an entry matching 'offset' on both the pd_dirraddhd list and the + * pd_pendinghd list of a pagedep. + */ +static struct diradd * +diradd_lookup(pagedep, offset) + struct pagedep *pagedep; + int offset; +{ + struct diradd *dap; + + LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist) + if (dap->da_offset == offset) + return (dap); + LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) + if (dap->da_offset == offset) + return (dap); + return (NULL); +} + +/* + * Search for a .. diradd dependency in a directory that is being removed. + * If the directory was renamed to a new parent we have a diradd rather + * than a mkdir for the .. entry. We need to cancel it now before + * it is found in truncate(). + */ +static struct jremref * +cancel_diradd_dotdot(ip, dirrem, jremref) + struct inode *ip; + struct dirrem *dirrem; + struct jremref *jremref; +{ + struct pagedep *pagedep; + struct diradd *dap; + struct worklist *wk; + + if (pagedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, 0, + &pagedep) == 0) + return (jremref); + dap = diradd_lookup(pagedep, DOTDOT_OFFSET); + if (dap == NULL) + return (jremref); + cancel_diradd(dap, dirrem, jremref, NULL, NULL); + /* + * Mark any journal work as belonging to the parent so it is freed + * with the .. reference. + */ + LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list) + wk->wk_state |= MKDIR_PARENT; + return (NULL); +} + +/* + * Cancel the MKDIR_PARENT mkdir component of a diradd when we're going to + * replace it with a dirrem/diradd pair as a result of re-parenting a + * directory. This ensures that we don't simultaneously have a mkdir and + * a diradd for the same .. entry. + */ +static struct jremref * +cancel_mkdir_dotdot(ip, dirrem, jremref) + struct inode *ip; + struct dirrem *dirrem; + struct jremref *jremref; +{ + struct inodedep *inodedep; + struct jaddref *jaddref; + struct mkdir *mkdir; + struct diradd *dap; + + if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, + &inodedep) == 0) + panic("cancel_mkdir_dotdot: Lost inodedep"); + dap = inodedep->id_mkdiradd; + if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0) + return (jremref); + for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; + mkdir = LIST_NEXT(mkdir, md_mkdirs)) + if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT) + break; + if (mkdir == NULL) + panic("cancel_mkdir_dotdot: Unable to find mkdir\n"); + if ((jaddref = mkdir->md_jaddref) != NULL) { + mkdir->md_jaddref = NULL; + jaddref->ja_state &= ~MKDIR_PARENT; + if (inodedep_lookup(UFSTOVFS(ip->i_ump), jaddref->ja_ino, 0, + &inodedep) == 0) + panic("cancel_mkdir_dotdot: Lost parent inodedep"); + if (cancel_jaddref(jaddref, inodedep, &dirrem->dm_jwork)) { + journal_jremref(dirrem, jremref, inodedep); + jremref = NULL; + } + } + if (mkdir->md_state & ONWORKLIST) + WORKLIST_REMOVE(&mkdir->md_list); + mkdir->md_state |= ALLCOMPLETE; + complete_mkdir(mkdir); + return (jremref); +} + +static void +journal_jremref(dirrem, jremref, inodedep) + struct dirrem *dirrem; + struct jremref *jremref; + struct inodedep *inodedep; +{ + + if (inodedep == NULL) + if (inodedep_lookup(jremref->jr_list.wk_mp, + jremref->jr_ref.if_ino, 0, &inodedep) == 0) + panic("journal_jremref: Lost inodedep"); + LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, jr_deps); + TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps); + add_to_journal(&jremref->jr_list); +} + +static void +dirrem_journal(dirrem, jremref, dotremref, dotdotremref) + struct dirrem *dirrem; + struct jremref *jremref; + struct jremref *dotremref; + struct jremref *dotdotremref; +{ + struct inodedep *inodedep; + + + if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, 0, + &inodedep) == 0) + panic("dirrem_journal: Lost inodedep"); + journal_jremref(dirrem, jremref, inodedep); + if (dotremref) + journal_jremref(dirrem, dotremref, inodedep); + if (dotdotremref) + journal_jremref(dirrem, dotdotremref, NULL); +} + /* * Allocate a new dirrem if appropriate and return it along with * its associated pagedep. Called without a lock, returns with lock. @@ -3303,12 +7086,17 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) struct diradd *dap; struct dirrem *dirrem; struct pagedep *pagedep; + struct jremref *jremref; + struct jremref *dotremref; + struct jremref *dotdotremref; + struct vnode *dvp; /* * Whiteouts have no deletion dependencies. */ if (ip == NULL) panic("newdirrem: whiteout"); + dvp = ITOV(dp); /* * If we are over our limit, try to improve the situation. * Limiting the number of dirrem structures will also limit @@ -3321,34 +7109,75 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) FREE_LOCK(&lk); dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO); - workitem_alloc(&dirrem->dm_list, D_DIRREM, ITOV(dp)->v_mount); + workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount); + LIST_INIT(&dirrem->dm_jremrefhd); + LIST_INIT(&dirrem->dm_jwork); dirrem->dm_state = isrmdir ? RMDIR : 0; dirrem->dm_oldinum = ip->i_number; *prevdirremp = NULL; - + /* + * Allocate remove reference structures to track journal write + * dependencies. We will always have one for the link and + * when doing directories we will always have one more for dot. + * When renaming a directory we skip the dotdot link change so + * this is not needed. + */ + jremref = dotremref = dotdotremref = NULL; + if (DOINGSUJ(dvp)) { + if (isrmdir) { + jremref = newjremref(dirrem, dp, ip, dp->i_offset, + ip->i_effnlink + 2); + dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET, + ip->i_effnlink + 1); + } else + jremref = newjremref(dirrem, dp, ip, dp->i_offset, + ip->i_effnlink + 1); + if (isrmdir > 1) { + dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET, + dp->i_effnlink + 1); + dotdotremref->jr_state |= MKDIR_PARENT; + } + } ACQUIRE_LOCK(&lk); lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0) + if (pagedep_lookup(UFSTOVFS(dp->i_ump), dp->i_number, lbn, DEPALLOC, + &pagedep) == 0) WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); dirrem->dm_pagedep = pagedep; + /* + * If we're renaming a .. link to a new directory, cancel any + * existing MKDIR_PARENT mkdir. If it has already been canceled + * the jremref is preserved for any potential diradd in this + * location. This can not coincide with a rmdir. + */ + if (dp->i_offset == DOTDOT_OFFSET) { + if (isrmdir) + panic("newdirrem: .. directory change during remove?"); + jremref = cancel_mkdir_dotdot(dp, dirrem, jremref); + } + /* + * If we're removing a directory search for the .. dependency now and + * cancel it. Any pending journal work will be added to the dirrem + * to be completed when the workitem remove completes. + */ + if (isrmdir > 1) + dotdotremref = cancel_diradd_dotdot(ip, dirrem, dotdotremref); /* * Check for a diradd dependency for the same directory entry. * If present, then both dependencies become obsolete and can - * be de-allocated. Check for an entry on both the pd_dirraddhd - * list and the pd_pendinghd list. + * be de-allocated. */ - - LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist) - if (dap->da_offset == offset) - break; + dap = diradd_lookup(pagedep, offset); if (dap == NULL) { - - LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) - if (dap->da_offset == offset) - break; - if (dap == NULL) - return (dirrem); + /* + * Link the jremref structures into the dirrem so they are + * written prior to the pagedep. + */ + if (jremref) + dirrem_journal(dirrem, jremref, dotremref, + dotdotremref); + return (dirrem); } /* * Must be ATTACHED at this point. @@ -3373,7 +7202,17 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) * Mark it COMPLETE so we can delete its inode immediately. */ dirrem->dm_state |= COMPLETE; - free_diradd(dap); + cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref); +#ifdef SUJ_DEBUG + if (isrmdir == 0) { + struct worklist *wk; + + LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list) + if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT)) + panic("bad wk %p (0x%X)\n", wk, wk->wk_state); + } +#endif + return (dirrem); } @@ -3407,6 +7246,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) struct dirrem *dirrem, *prevdirrem; struct pagedep *pagedep; struct inodedep *inodedep; + struct jaddref *jaddref; struct mount *mp; offset = blkoff(dp->i_fs, dp->i_offset); @@ -3422,6 +7262,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE; dap->da_offset = offset; dap->da_newinum = newinum; + LIST_INIT(&dap->da_jwork); } /* @@ -3454,11 +7295,21 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) dm_next); } else { dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list, 0); } FREE_LOCK(&lk); return; } + /* + * Add the dirrem to the inodedep's pending remove list for quick + * discovery later. A valid nlinkdelta ensures that this lookup + * will not fail. + */ + if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) + panic("softdep_setup_directory_change: Lost inodedep."); + dirrem->dm_state |= ONDEPLIST; + LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext); /* * If the COMPLETE flag is clear, then there were no active @@ -3483,15 +7334,29 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) dap->da_pagedep = pagedep; } dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + if (LIST_EMPTY(&dirrem->dm_jremrefhd)) + add_to_worklist(&dirrem->dm_list, 0); } /* - * Link into its inodedep. Put it on the id_bufwait list if the inode + * Lookup the jaddref for this journal entry. We must finish + * initializing it and make the diradd write dependent on it. + * If we're not journaling Put it on the id_bufwait list if the inode * is not yet written. If it is written, do the post-inode write * processing to put it on the id_pendinghd list. */ - if (inodedep_lookup(mp, newinum, DEPALLOC, &inodedep) == 0 || - (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { + inodedep_lookup(mp, newinum, DEPALLOC, &inodedep); + if (mp->mnt_kern_flag & MNTK_SUJ) { + jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, + inoreflst); + KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, + ("softdep_setup_directory_change: bad jaddref %p", + jaddref)); + jaddref->ja_diroff = dp->i_offset; + jaddref->ja_diradd = dap; + LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], + dap, da_pdlist); + add_to_journal(&jaddref->ja_list); + } else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { dap->da_state |= COMPLETE; LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); @@ -3500,6 +7365,13 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) dap, da_pdlist); WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); } + /* + * If we're making a new name for a directory that has not been + * committed when need to move the dot and dotdot references to + * this new name. + */ + if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET) + merge_diradd(inodedep, dap); FREE_LOCK(&lk); } @@ -3516,8 +7388,7 @@ softdep_change_linkcnt(ip) struct inodedep *inodedep; ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, - DEPALLOC, &inodedep); + inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep); if (ip->i_nlink < ip->i_effnlink) panic("softdep_change_linkcnt: bad delta"); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; @@ -3573,6 +7444,305 @@ softdep_releasefile(ip) ip->i_flag |= IN_SPACECOUNTED; } +/* + * Attach a sbdep dependency to the superblock buf so that we can keep + * track of the head of the linked list of referenced but unlinked inodes. + */ +void +softdep_setup_sbupdate(ump, fs, bp) + struct ufsmount *ump; + struct fs *fs; + struct buf *bp; +{ + struct sbdep *sbdep; + struct worklist *wk; + + if ((fs->fs_flags & FS_SUJ) == 0) + return; + LIST_FOREACH(wk, &bp->b_dep, wk_list) + if (wk->wk_type == D_SBDEP) + break; + if (wk != NULL) + return; + sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS); + workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump)); + sbdep->sb_fs = fs; + sbdep->sb_ump = ump; + ACQUIRE_LOCK(&lk); + WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list); + FREE_LOCK(&lk); +} + +/* + * Return the first unlinked inodedep which is ready to be the head of the + * list. The inodedep and all those after it must have valid next pointers. + */ +static struct inodedep * +first_unlinked_inodedep(ump) + struct ufsmount *ump; +{ + struct inodedep *inodedep; + struct inodedep *idp; + + for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst); + inodedep; inodedep = idp) { + if ((inodedep->id_state & UNLINKNEXT) == 0) + return (NULL); + idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked); + if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0) + break; + if ((inodedep->id_state & UNLINKPREV) == 0) + panic("first_unlinked_inodedep: prev != next"); + } + if (inodedep == NULL) + return (NULL); + + return (inodedep); +} + +/* + * Set the sujfree unlinked head pointer prior to writing a superblock. + */ +static void +initiate_write_sbdep(sbdep) + struct sbdep *sbdep; +{ + struct inodedep *inodedep; + struct fs *bpfs; + struct fs *fs; + + bpfs = sbdep->sb_fs; + fs = sbdep->sb_ump->um_fs; + inodedep = first_unlinked_inodedep(sbdep->sb_ump); + if (inodedep) { + fs->fs_sujfree = inodedep->id_ino; + inodedep->id_state |= UNLINKPREV; + } else + fs->fs_sujfree = 0; + bpfs->fs_sujfree = fs->fs_sujfree; +} + +/* + * After a superblock is written determine whether it must be written again + * due to a changing unlinked list head. + */ +static int +handle_written_sbdep(sbdep, bp) + struct sbdep *sbdep; + struct buf *bp; +{ + struct inodedep *inodedep; + struct mount *mp; + struct fs *fs; + + fs = sbdep->sb_fs; + mp = UFSTOVFS(sbdep->sb_ump); + inodedep = first_unlinked_inodedep(sbdep->sb_ump); + if ((inodedep && fs->fs_sujfree != inodedep->id_ino) || + (inodedep == NULL && fs->fs_sujfree != 0)) { + bdirty(bp); + return (1); + } + WORKITEM_FREE(sbdep, D_SBDEP); + if (fs->fs_sujfree == 0) + return (0); + if (inodedep_lookup(mp, fs->fs_sujfree, 0, &inodedep) == 0) + panic("handle_written_sbdep: lost inodedep"); + /* + * Now that we have a record of this indode in stable store allow it + * to be written to free up pending work. Inodes may see a lot of + * write activity after they are unlinked which we must not hold up. + */ + for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) { + if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS) + panic("handle_written_sbdep: Bad inodedep %p (0x%X)", + inodedep, inodedep->id_state); + if (inodedep->id_state & UNLINKONLIST) + break; + inodedep->id_state |= DEPCOMPLETE | UNLINKONLIST; + } + + return (0); +} + +/* + * Mark an inodedep has unlinked and insert it into the in-memory unlinked + * list. + */ +static void +unlinked_inodedep(mp, inodedep) + struct mount *mp; + struct inodedep *inodedep; +{ + struct ufsmount *ump; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + ump->um_fs->fs_fmod = 1; + inodedep->id_state |= UNLINKED; + TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked); +} + +/* + * Remove an inodedep from the unlinked inodedep list. This may require + * disk writes if the inode has made it that far. + */ +static void +clear_unlinked_inodedep(inodedep) + struct inodedep *inodedep; +{ + struct ufsmount *ump; + struct inodedep *idp; + struct inodedep *idn; + struct fs *fs; + struct buf *bp; + ino_t ino; + ino_t nino; + ino_t pino; + int error; + + ump = VFSTOUFS(inodedep->id_list.wk_mp); + fs = ump->um_fs; + ino = inodedep->id_ino; + error = 0; + for (;;) { + /* + * If nothing has yet been written simply remove us from + * the in memory list and return. This is the most common + * case where handle_workitem_remove() loses the final + * reference. + */ + if ((inodedep->id_state & UNLINKLINKS) == 0) + break; + /* + * If we have a NEXT pointer and no PREV pointer we can simply + * clear NEXT's PREV and remove ourselves from the list. Be + * careful not to clear PREV if the superblock points at + * next as well. + */ + idn = TAILQ_NEXT(inodedep, id_unlinked); + if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) { + if (idn && fs->fs_sujfree != idn->id_ino) + idn->id_state &= ~UNLINKPREV; + break; + } + /* + * Here we have an inodedep which is actually linked into + * the list. We must remove it by forcing a write to the + * link before us, whether it be the superblock or an inode. + * Unfortunately the list may change while we're waiting + * on the buf lock for either resource so we must loop until + * we lock. the right one. If both the superblock and an + * inode point to this inode we must clear the inode first + * followed by the superblock. + */ + idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked); + pino = 0; + if (idp && (idp->id_state & UNLINKNEXT)) + pino = idp->id_ino; + FREE_LOCK(&lk); + if (pino == 0) + bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), + (int)fs->fs_sbsize, 0, 0, 0); + else + error = bread(ump->um_devvp, + fsbtodb(fs, ino_to_fsba(fs, pino)), + (int)fs->fs_bsize, NOCRED, &bp); + ACQUIRE_LOCK(&lk); + if (error) + break; + /* If the list has changed restart the loop. */ + idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked); + nino = 0; + if (idp && (idp->id_state & UNLINKNEXT)) + nino = idp->id_ino; + if (nino != pino || + (inodedep->id_state & UNLINKPREV) != UNLINKPREV) { + FREE_LOCK(&lk); + brelse(bp); + ACQUIRE_LOCK(&lk); + continue; + } + /* + * Remove us from the in memory list. After this we cannot + * access the inodedep. + */ + idn = TAILQ_NEXT(inodedep, id_unlinked); + inodedep->id_state &= ~(UNLINKED | UNLINKLINKS); + TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked); + /* + * Determine the next inode number. + */ + nino = 0; + if (idn) { + /* + * If next isn't on the list we can just clear prev's + * state and schedule it to be fixed later. No need + * to synchronously write if we're not in the real + * list. + */ + if ((idn->id_state & UNLINKPREV) == 0 && pino != 0) { + idp->id_state &= ~UNLINKNEXT; + if ((idp->id_state & ONWORKLIST) == 0) + WORKLIST_INSERT(&bp->b_dep, + &idp->id_list); + FREE_LOCK(&lk); + bawrite(bp); + ACQUIRE_LOCK(&lk); + return; + } + nino = idn->id_ino; + } + FREE_LOCK(&lk); + /* + * The predecessor's next pointer is manually updated here + * so that the NEXT flag is never cleared for an element + * that is in the list. + */ + if (pino == 0) { + bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + ffs_oldfscompat_write((struct fs *)bp->b_data, ump); + softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, + bp); + } else if (fs->fs_magic == FS_UFS1_MAGIC) + ((struct ufs1_dinode *)bp->b_data + + ino_to_fsbo(fs, pino))->di_freelink = nino; + else + ((struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, pino))->di_freelink = nino; + /* + * If the bwrite fails we have no recourse to recover. The + * filesystem is corrupted already. + */ + bwrite(bp); + ACQUIRE_LOCK(&lk); + /* + * If the superblock pointer still needs to be cleared force + * a write here. + */ + if (fs->fs_sujfree == ino) { + FREE_LOCK(&lk); + bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), + (int)fs->fs_sbsize, 0, 0, 0); + bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + ffs_oldfscompat_write((struct fs *)bp->b_data, ump); + softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, + bp); + bwrite(bp); + ACQUIRE_LOCK(&lk); + } + if (fs->fs_sujfree != ino) + return; + panic("clear_unlinked_inodedep: Failed to clear free head"); + } + if (inodedep->id_ino == fs->fs_sujfree) + panic("clear_unlinked_inodedep: Freeing head of free list"); + inodedep->id_state &= ~(UNLINKED | UNLINKLINKS); + TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked); + return; +} + /* * This workitem decrements the inode's link count. * If the link count reaches zero, the file is removed. @@ -3584,22 +7754,54 @@ handle_workitem_remove(dirrem, xp) { struct thread *td = curthread; struct inodedep *inodedep; + struct workhead dotdotwk; + struct worklist *wk; + struct ufsmount *ump; + struct mount *mp; struct vnode *vp; struct inode *ip; ino_t oldinum; int error; + if (dirrem->dm_state & ONWORKLIST) + panic("handle_workitem_remove: dirrem %p still on worklist", + dirrem); + oldinum = dirrem->dm_oldinum; + mp = dirrem->dm_list.wk_mp; + ump = VFSTOUFS(mp); if ((vp = xp) == NULL && - (error = ffs_vgetf(dirrem->dm_list.wk_mp, - dirrem->dm_oldinum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)) != 0) { + (error = ffs_vgetf(mp, oldinum, LK_EXCLUSIVE, &vp, + FFSV_FORCEINSMQ)) != 0) { softdep_error("handle_workitem_remove: vget", error); return; } ip = VTOI(vp); ACQUIRE_LOCK(&lk); - if ((inodedep_lookup(dirrem->dm_list.wk_mp, - dirrem->dm_oldinum, 0, &inodedep)) == 0) + if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0) panic("handle_workitem_remove: lost inodedep"); + if (dirrem->dm_state & ONDEPLIST) + LIST_REMOVE(dirrem, dm_inonext); + KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd), + ("handle_workitem_remove: Journal entries not written.")); + + /* + * Move all dependencies waiting on the remove to complete + * from the dirrem to the inode inowait list to be completed + * after the inode has been updated and written to disk. Any + * marked MKDIR_PARENT are saved to be completed when the .. ref + * is removed. + */ + LIST_INIT(&dotdotwk); + while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) { + WORKLIST_REMOVE(wk); + if (wk->wk_state & MKDIR_PARENT) { + wk->wk_state &= ~MKDIR_PARENT; + WORKLIST_INSERT(&dotdotwk, wk); + continue; + } + WORKLIST_INSERT(&inodedep->id_inowait, wk); + } + LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list); /* * Normal file deletion. */ @@ -3609,12 +7811,16 @@ handle_workitem_remove(dirrem, xp) ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad file delta"); + if (ip->i_nlink == 0) + unlinked_inodedep(mp, inodedep); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; num_dirrem -= 1; + KASSERT(LIST_EMPTY(&dirrem->dm_jwork), + ("handle_workitem_remove: worklist not empty. %s", + TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type))); WORKITEM_FREE(dirrem, D_DIRREM); FREE_LOCK(&lk); - vput(vp); - return; + goto out; } /* * Directory deletion. Decrement reference count for both the @@ -3628,6 +7834,8 @@ handle_workitem_remove(dirrem, xp) ip->i_flag |= IN_CHANGE; if (ip->i_nlink < ip->i_effnlink) panic("handle_workitem_remove: bad dir delta"); + if (ip->i_nlink == 0) + unlinked_inodedep(mp, inodedep); inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink; FREE_LOCK(&lk); if ((error = ffs_truncate(vp, (off_t)0, 0, td->td_ucred, td)) != 0) @@ -3639,36 +7847,47 @@ handle_workitem_remove(dirrem, xp) * directory should not change. Thus we skip the followup dirrem. */ if (dirrem->dm_state & DIRCHG) { + KASSERT(LIST_EMPTY(&dirrem->dm_jwork), + ("handle_workitem_remove: DIRCHG and worklist not empty.")); num_dirrem -= 1; WORKITEM_FREE(dirrem, D_DIRREM); FREE_LOCK(&lk); - vput(vp); - return; + goto out; } - /* - * If the inodedep does not exist, then the zero'ed inode has - * been written to disk. If the allocated inode has never been - * written to disk, then the on-disk inode is zero'ed. In either - * case we can remove the file immediately. - */ - dirrem->dm_state = 0; - oldinum = dirrem->dm_oldinum; + dirrem->dm_state = ONDEPLIST; dirrem->dm_oldinum = dirrem->dm_dirinum; - if (inodedep_lookup(dirrem->dm_list.wk_mp, oldinum, - 0, &inodedep) == 0 || check_inode_unwritten(inodedep)) { + /* + * Place the dirrem on the parent's diremhd list. + */ + if (inodedep_lookup(mp, dirrem->dm_oldinum, 0, &inodedep) == 0) + panic("handle_workitem_remove: lost dir inodedep"); + LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext); + /* + * If the allocated inode has never been written to disk, then + * the on-disk inode is zero'ed and we can remove the file + * immediately. When journaling if the inode has been marked + * unlinked and not DEPCOMPLETE we know it can never be written. + */ + inodedep_lookup(mp, oldinum, 0, &inodedep); + if (inodedep == NULL || + (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED || + check_inode_unwritten(inodedep)) { if (xp != NULL) - add_to_worklist(&dirrem->dm_list); + add_to_worklist(&dirrem->dm_list, 0); FREE_LOCK(&lk); - vput(vp); - if (xp == NULL) + if (xp == NULL) { + vput(vp); handle_workitem_remove(dirrem, NULL); + } return; } WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); FREE_LOCK(&lk); ip->i_flag |= IN_CHANGE; +out: ffs_update(vp, 0); - vput(vp); + if (xp == NULL) + vput(vp); } /* @@ -3689,6 +7908,7 @@ static void handle_workitem_freefile(freefile) struct freefile *freefile; { + struct workhead wkhd; struct fs *fs; struct inodedep *idp; struct ufsmount *ump; @@ -3701,13 +7921,15 @@ handle_workitem_freefile(freefile) error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp); FREE_LOCK(&lk); if (error) - panic("handle_workitem_freefile: inodedep survived"); + panic("handle_workitem_freefile: inodedep %p survived", idp); #endif UFS_LOCK(ump); fs->fs_pendinginodes -= 1; UFS_UNLOCK(ump); + LIST_INIT(&wkhd); + LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list); if ((error = ffs_freefile(ump, fs, freefile->fx_devvp, - freefile->fx_oldinum, freefile->fx_mode)) != 0) + freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0) softdep_error("handle_workitem_freefile", error); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefile, D_FREEFILE); @@ -3757,8 +7979,10 @@ softdep_disk_io_initiation(bp) { struct worklist *wk; struct worklist marker; - struct indirdep *indirdep; struct inodedep *inodedep; + struct freeblks *freeblks; + struct jfreeblk *jfreeblk; + struct newblk *newblk; /* * We only care about write operations. There should never @@ -3767,6 +7991,10 @@ softdep_disk_io_initiation(bp) if (bp->b_iocmd != BIO_WRITE) panic("softdep_disk_io_initiation: not write"); + if (bp->b_vflags & BV_BKGRDINPROG) + panic("softdep_disk_io_initiation: Writing buffer with " + "background write in progress: %p", bp); + marker.wk_type = D_LAST + 1; /* Not a normal workitem */ PHOLD(curproc); /* Don't swap out kernel stack */ @@ -3792,46 +8020,58 @@ softdep_disk_io_initiation(bp) continue; case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) - panic("disk_io_initiation: indirdep gone"); - /* - * If there are no remaining dependencies, this - * will be writing the real pointers, so the - * dependency can be freed. - */ - if (LIST_EMPTY(&indirdep->ir_deplisthd)) { - struct buf *bp; + initiate_write_indirdep(WK_INDIRDEP(wk), bp); + continue; - bp = indirdep->ir_savebp; - bp->b_flags |= B_INVAL | B_NOCACHE; - /* inline expand WORKLIST_REMOVE(wk); */ - wk->wk_state &= ~ONWORKLIST; - LIST_REMOVE(wk, wk_list); - WORKITEM_FREE(indirdep, D_INDIRDEP); - FREE_LOCK(&lk); - brelse(bp); - ACQUIRE_LOCK(&lk); - continue; - } + case D_BMSAFEMAP: + initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp); + continue; + + case D_JSEG: + WK_JSEG(wk)->js_buf = NULL; + continue; + + case D_FREEBLKS: + freeblks = WK_FREEBLKS(wk); + jfreeblk = LIST_FIRST(&freeblks->fb_jfreeblkhd); /* - * Replace up-to-date version with safe version. + * We have to wait for the jfreeblks to be journaled + * before we can write an inodeblock with updated + * pointers. Be careful to arrange the marker so + * we revisit the jfreeblk if it's not removed by + * the first jwait(). */ - FREE_LOCK(&lk); - indirdep->ir_saveddata = malloc(bp->b_bcount, - M_INDIRDEP, M_SOFTDEP_FLAGS); - ACQUIRE_LOCK(&lk); - indirdep->ir_state &= ~ATTACHED; - indirdep->ir_state |= UNDONE; - bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); - bcopy(indirdep->ir_savebp->b_data, bp->b_data, - bp->b_bcount); + if (jfreeblk != NULL) { + LIST_REMOVE(&marker, wk_list); + LIST_INSERT_BEFORE(wk, &marker, wk_list); + jwait(&jfreeblk->jf_list); + } + continue; + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + /* + * We have to wait for the jnewblk to be journaled + * before we can write to a block otherwise the + * contents may be confused with an earlier file + * at recovery time. Handle the marker as described + * above. + */ + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk != NULL) { + LIST_REMOVE(&marker, wk_list); + LIST_INSERT_BEFORE(wk, &marker, wk_list); + jwait(&newblk->nb_jnewblk->jn_list); + } + continue; + + case D_SBDEP: + initiate_write_sbdep(WK_SBDEP(wk)); continue; case D_MKDIR: - case D_BMSAFEMAP: - case D_ALLOCDIRECT: - case D_ALLOCINDIR: + case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: continue; default: @@ -3855,6 +8095,9 @@ initiate_write_filepage(pagedep, bp) struct pagedep *pagedep; struct buf *bp; { + struct jremref *jremref; + struct jmvref *jmvref; + struct dirrem *dirrem; struct diradd *dap; struct direct *ep; int i; @@ -3869,6 +8112,22 @@ initiate_write_filepage(pagedep, bp) return; } pagedep->pd_state |= IOSTARTED; + /* + * Wait for all journal remove dependencies to hit the disk. + * We can not allow any potentially conflicting directory adds + * to be visible before removes and rollback is too difficult. + * lk may be dropped and re-acquired, however we hold the buf + * locked so the dependency can not go away. + */ + LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { + stat_jwait_filepage++; + jwait(&jremref->jr_list); + } + while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { + stat_jwait_filepage++; + jwait(&jmvref->jm_list); + } for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { ep = (struct direct *) @@ -3905,6 +8164,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp) struct allocdirect *adp, *lastadp; struct ufs1_dinode *dp; struct ufs1_dinode *sip; + struct inoref *inoref; struct fs *fs; ufs_lbn_t i; #ifdef INVARIANTS @@ -3918,6 +8178,17 @@ initiate_write_inodeblock_ufs1(inodedep, bp) fs = inodedep->id_fs; dp = (struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, inodedep->id_ino); + + /* + * If we're on the unlinked list but have not yet written our + * next pointer initialize it here. + */ + if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) { + struct inodedep *inon; + + inon = TAILQ_NEXT(inodedep, id_unlinked); + dp->di_freelink = inon ? inon->id_ino : 0; + } /* * If the bitmap is not yet written, then the allocated * inode cannot be written to disk. @@ -3933,6 +8204,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp) *inodedep->id_savedino1 = *dp; bzero((caddr_t)dp, sizeof(struct ufs1_dinode)); dp->di_gen = inodedep->id_savedino1->di_gen; + dp->di_freelink = inodedep->id_savedino1->di_freelink; return; } /* @@ -3940,32 +8212,40 @@ initiate_write_inodeblock_ufs1(inodedep, bp) */ inodedep->id_savedsize = dp->di_size; inodedep->id_savedextsize = 0; - if (TAILQ_EMPTY(&inodedep->id_inoupdt)) + inodedep->id_savednlink = dp->di_nlink; + if (TAILQ_EMPTY(&inodedep->id_inoupdt) && + TAILQ_EMPTY(&inodedep->id_inoreflst)) return; + /* + * Revert the link count to that of the first unwritten journal entry. + */ + inoref = TAILQ_FIRST(&inodedep->id_inoreflst); + if (inoref) + dp->di_nlink = inoref->if_nlink; /* * Set the dependencies to busy. */ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (adp->ad_offset < NDADDR && + dp->di_db[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %d != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - dp->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + dp->di_db[adp->ad_offset], (intmax_t)adp->ad_newblkno); - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) + if (adp->ad_offset >= NDADDR && + dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno) panic("%s: indirect pointer #%jd mismatch %d != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn - NDADDR, - dp->di_ib[adp->ad_lbn - NDADDR], + (intmax_t)adp->ad_offset - NDADDR, + dp->di_ib[adp->ad_offset - NDADDR], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -3981,14 +8261,14 @@ initiate_write_inodeblock_ufs1(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) + if (adp->ad_offset >= NDADDR) break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_db[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { + dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NDADDR; i++) { #ifdef INVARIANTS if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep1"); @@ -4012,8 +8292,8 @@ initiate_write_inodeblock_ufs1(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_db[i] != 0) break; dp->di_size = (i + 1) * fs->fs_bsize; @@ -4030,7 +8310,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp) * postpone fsck, we are stuck with this argument. */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; + dp->di_ib[adp->ad_offset - NDADDR] = 0; } /* @@ -4051,6 +8331,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) struct allocdirect *adp, *lastadp; struct ufs2_dinode *dp; struct ufs2_dinode *sip; + struct inoref *inoref; struct fs *fs; ufs_lbn_t i; #ifdef INVARIANTS @@ -4064,6 +8345,29 @@ initiate_write_inodeblock_ufs2(inodedep, bp) fs = inodedep->id_fs; dp = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, inodedep->id_ino); + + /* + * If we're on the unlinked list but have not yet written our + * next pointer initialize it here. + */ + if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) { + struct inodedep *inon; + + inon = TAILQ_NEXT(inodedep, id_unlinked); + dp->di_freelink = inon ? inon->id_ino : 0; + } + if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == + (UNLINKED | UNLINKNEXT)) { + struct inodedep *inon; + ino_t freelink; + + inon = TAILQ_NEXT(inodedep, id_unlinked); + freelink = inon ? inon->id_ino : 0; + if (freelink != dp->di_freelink) + panic("ino %p(0x%X) %d, %d != %d", + inodedep, inodedep->id_state, inodedep->id_ino, + freelink, dp->di_freelink); + } /* * If the bitmap is not yet written, then the allocated * inode cannot be written to disk. @@ -4079,6 +8383,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) *inodedep->id_savedino2 = *dp; bzero((caddr_t)dp, sizeof(struct ufs2_dinode)); dp->di_gen = inodedep->id_savedino2->di_gen; + dp->di_freelink = inodedep->id_savedino2->di_freelink; return; } /* @@ -4086,25 +8391,34 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ inodedep->id_savedsize = dp->di_size; inodedep->id_savedextsize = dp->di_extsize; + inodedep->id_savednlink = dp->di_nlink; if (TAILQ_EMPTY(&inodedep->id_inoupdt) && - TAILQ_EMPTY(&inodedep->id_extupdt)) + TAILQ_EMPTY(&inodedep->id_extupdt) && + TAILQ_EMPTY(&inodedep->id_inoreflst)) return; + /* + * Revert the link count to that of the first unwritten journal entry. + */ + inoref = TAILQ_FIRST(&inodedep->id_inoreflst); + if (inoref) + dp->di_nlink = inoref->if_nlink; + /* * Set the ext data dependencies to busy. */ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_extb[adp->ad_lbn], + (intmax_t)adp->ad_offset, + (intmax_t)dp->di_extb[adp->ad_offset], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -4120,12 +8434,12 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_extb[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NXADDR; i++) { + dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NXADDR; i++) { #ifdef INVARIANTS if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep1"); @@ -4142,8 +8456,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_extb[i] != 0) break; dp->di_extsize = (i + 1) * fs->fs_bsize; @@ -4154,24 +8468,24 @@ initiate_write_inodeblock_ufs2(inodedep, bp) for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = TAILQ_NEXT(adp, ad_next)) { #ifdef INVARIANTS - if (deplist != 0 && prevlbn >= adp->ad_lbn) + if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); - prevlbn = adp->ad_lbn; - if (adp->ad_lbn < NDADDR && - dp->di_db[adp->ad_lbn] != adp->ad_newblkno) + prevlbn = adp->ad_offset; + if (adp->ad_offset < NDADDR && + dp->di_db[adp->ad_offset] != adp->ad_newblkno) panic("%s: direct pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock", - (intmax_t)adp->ad_lbn, - (intmax_t)dp->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + (intmax_t)dp->di_db[adp->ad_offset], (intmax_t)adp->ad_newblkno); - if (adp->ad_lbn >= NDADDR && - dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) + if (adp->ad_offset >= NDADDR && + dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno) panic("%s indirect pointer #%jd mismatch %jd != %jd", "softdep_write_inodeblock:", - (intmax_t)adp->ad_lbn - NDADDR, - (intmax_t)dp->di_ib[adp->ad_lbn - NDADDR], + (intmax_t)adp->ad_offset - NDADDR, + (intmax_t)dp->di_ib[adp->ad_offset - NDADDR], (intmax_t)adp->ad_newblkno); - deplist |= 1 << adp->ad_lbn; + deplist |= 1 << adp->ad_offset; if ((adp->ad_state & ATTACHED) == 0) panic("softdep_write_inodeblock: Unknown state 0x%x", adp->ad_state); @@ -4187,14 +8501,14 @@ initiate_write_inodeblock_ufs2(inodedep, bp) */ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) { - if (adp->ad_lbn >= NDADDR) + if (adp->ad_offset >= NDADDR) break; - dp->di_db[adp->ad_lbn] = adp->ad_oldblkno; + dp->di_db[adp->ad_offset] = adp->ad_oldblkno; /* keep going until hitting a rollback to a frag */ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize) continue; - dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize; - for (i = adp->ad_lbn + 1; i < NDADDR; i++) { + dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize; + for (i = adp->ad_offset + 1; i < NDADDR; i++) { #ifdef INVARIANTS if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) panic("softdep_write_inodeblock: lost dep2"); @@ -4218,8 +8532,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * we already checked for fragments in the loop above. */ if (lastadp != NULL && - dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) { - for (i = lastadp->ad_lbn; i >= 0; i--) + dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) { + for (i = lastadp->ad_offset; i >= 0; i--) if (dp->di_db[i] != 0) break; dp->di_size = (i + 1) * fs->fs_bsize; @@ -4236,7 +8550,355 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * postpone fsck, we are stuck with this argument. */ for (; adp; adp = TAILQ_NEXT(adp, ad_next)) - dp->di_ib[adp->ad_lbn - NDADDR] = 0; + dp->di_ib[adp->ad_offset - NDADDR] = 0; +} + +/* + * Cancel an indirdep as a result of truncation. Release all of the + * children allocindirs and place their journal work on the appropriate + * list. + */ +static void +cancel_indirdep(indirdep, bp, inodedep, freeblks) + struct indirdep *indirdep; + struct buf *bp; + struct inodedep *inodedep; + struct freeblks *freeblks; +{ + struct allocindir *aip; + + /* + * None of the indirect pointers will ever be visible, + * so they can simply be tossed. GOINGAWAY ensures + * that allocated pointers will be saved in the buffer + * cache until they are freed. Note that they will + * only be able to be found by their physical address + * since the inode mapping the logical address will + * be gone. The save buffer used for the safe copy + * was allocated in setup_allocindir_phase2 using + * the physical address so it could be used for this + * purpose. Hence we swap the safe copy with the real + * copy, allowing the safe copy to be freed and holding + * on to the real copy for later use in indir_trunc. + */ + if (indirdep->ir_state & GOINGAWAY) + panic("cancel_indirdep: already gone"); + if (indirdep->ir_state & ONDEPLIST) { + indirdep->ir_state &= ~ONDEPLIST; + LIST_REMOVE(indirdep, ir_next); + } + indirdep->ir_state |= GOINGAWAY; + VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1; + while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) + cancel_allocindir(aip, inodedep, freeblks); + while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) + cancel_allocindir(aip, inodedep, freeblks); + while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) + cancel_allocindir(aip, inodedep, freeblks); + while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0) + cancel_allocindir(aip, inodedep, freeblks); + bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); + WORKLIST_REMOVE(&indirdep->ir_list); + WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list); + indirdep->ir_savebp = NULL; +} + +/* + * Free an indirdep once it no longer has new pointers to track. + */ +static void +free_indirdep(indirdep) + struct indirdep *indirdep; +{ + + KASSERT(LIST_EMPTY(&indirdep->ir_jwork), + ("free_indirdep: Journal work not empty.")); + KASSERT(LIST_EMPTY(&indirdep->ir_completehd), + ("free_indirdep: Complete head not empty.")); + KASSERT(LIST_EMPTY(&indirdep->ir_writehd), + ("free_indirdep: write head not empty.")); + KASSERT(LIST_EMPTY(&indirdep->ir_donehd), + ("free_indirdep: done head not empty.")); + KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd), + ("free_indirdep: deplist head not empty.")); + KASSERT(indirdep->ir_savebp == NULL, + ("free_indirdep: %p ir_savebp != NULL", indirdep)); + KASSERT((indirdep->ir_state & ONDEPLIST) == 0, + ("free_indirdep: %p still on deplist.", indirdep)); + if (indirdep->ir_state & ONWORKLIST) + WORKLIST_REMOVE(&indirdep->ir_list); + WORKITEM_FREE(indirdep, D_INDIRDEP); +} + +/* + * Called before a write to an indirdep. This routine is responsible for + * rolling back pointers to a safe state which includes only those + * allocindirs which have been completed. + */ +static void +initiate_write_indirdep(indirdep, bp) + struct indirdep *indirdep; + struct buf *bp; +{ + + if (indirdep->ir_state & GOINGAWAY) + panic("disk_io_initiation: indirdep gone"); + + /* + * If there are no remaining dependencies, this will be writing + * the real pointers. + */ + if (LIST_EMPTY(&indirdep->ir_deplisthd)) + return; + /* + * Replace up-to-date version with safe version. + */ + FREE_LOCK(&lk); + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); + ACQUIRE_LOCK(&lk); + indirdep->ir_state &= ~ATTACHED; + indirdep->ir_state |= UNDONE; + bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); + bcopy(indirdep->ir_savebp->b_data, bp->b_data, + bp->b_bcount); +} + +/* + * Called when an inode has been cleared in a cg bitmap. This finally + * eliminates any canceled jaddrefs + */ +void +softdep_setup_inofree(mp, bp, ino, wkhd) + struct mount *mp; + struct buf *bp; + ino_t ino; + struct workhead *wkhd; +{ + struct worklist *wk, *wkn; + struct inodedep *inodedep; + uint8_t *inosused; + struct cg *cgp; + struct fs *fs; + + ACQUIRE_LOCK(&lk); + fs = VFSTOUFS(mp)->um_fs; + cgp = (struct cg *)bp->b_data; + inosused = cg_inosused(cgp); + if (isset(inosused, ino % fs->fs_ipg)) + panic("softdep_setup_inofree: inode %d not freed.", ino); + if (inodedep_lookup(mp, ino, 0, &inodedep)) + panic("softdep_setup_inofree: ino %d has existing inodedep %p", + ino, inodedep); + if (wkhd) { + LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { + if (wk->wk_type != D_JADDREF) + continue; + WORKLIST_REMOVE(wk); + /* + * We can free immediately even if the jaddref + * isn't attached in a background write as now + * the bitmaps are reconciled. + */ + wk->wk_state |= COMPLETE | ATTACHED; + free_jaddref(WK_JADDREF(wk)); + } + jwork_move(&bp->b_dep, wkhd); + } + FREE_LOCK(&lk); +} + + +/* + * Called via ffs_blkfree() after a set of frags has been cleared from a cg + * map. Any dependencies waiting for the write to clear are added to the + * buf's list and any jnewblks that are being canceled are discarded + * immediately. + */ +void +softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) + struct mount *mp; + struct buf *bp; + ufs2_daddr_t blkno; + int frags; + struct workhead *wkhd; +{ + struct jnewblk *jnewblk; + struct worklist *wk, *wkn; +#ifdef SUJ_DEBUG + struct bmsafemap *bmsafemap; + struct fs *fs; + uint8_t *blksfree; + struct cg *cgp; + ufs2_daddr_t jstart; + ufs2_daddr_t jend; + ufs2_daddr_t end; + long bno; + int i; +#endif + + ACQUIRE_LOCK(&lk); + /* + * Detach any jnewblks which have been canceled. They must linger + * until the bitmap is cleared again by ffs_blkfree() to prevent + * an unjournaled allocation from hitting the disk. + */ + if (wkhd) { + LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { + if (wk->wk_type != D_JNEWBLK) + continue; + jnewblk = WK_JNEWBLK(wk); + KASSERT(jnewblk->jn_state & GOINGAWAY, + ("softdep_setup_blkfree: jnewblk not canceled.")); + WORKLIST_REMOVE(wk); +#ifdef SUJ_DEBUG + /* + * Assert that this block is free in the bitmap + * before we discard the jnewblk. + */ + fs = VFSTOUFS(mp)->um_fs; + cgp = (struct cg *)bp->b_data; + blksfree = cg_blksfree(cgp); + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; + i < jnewblk->jn_frags; i++) { + if (isset(blksfree, bno + i)) + continue; + panic("softdep_setup_blkfree: not free"); + } +#endif + /* + * Even if it's not attached we can free immediately + * as the new bitmap is correct. + */ + wk->wk_state |= COMPLETE | ATTACHED; + free_jnewblk(jnewblk); + } + /* + * The buf must be locked by the caller otherwise these could + * be added while it's being written and the write would + * complete them before they made it to disk. + */ + jwork_move(&bp->b_dep, wkhd); + } + +#ifdef SUJ_DEBUG + /* + * Assert that we are not freeing a block which has an outstanding + * allocation dependency. + */ + fs = VFSTOUFS(mp)->um_fs; + bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno)); + end = blkno + frags; + LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { + /* + * Don't match against blocks that will be freed when the + * background write is done. + */ + if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) == + (COMPLETE | DEPCOMPLETE)) + continue; + jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags; + jend = jnewblk->jn_blkno + jnewblk->jn_frags; + if ((blkno >= jstart && blkno < jend) || + (end > jstart && end <= jend)) { + printf("state 0x%X %jd - %d %d dep %p\n", + jnewblk->jn_state, jnewblk->jn_blkno, + jnewblk->jn_oldfrags, jnewblk->jn_frags, + jnewblk->jn_newblk); + panic("softdep_setup_blkfree: " + "%jd-%jd(%d) overlaps with %jd-%jd", + blkno, end, frags, jstart, jend); + } + } +#endif + FREE_LOCK(&lk); +} + +static void +initiate_write_bmsafemap(bmsafemap, bp) + struct bmsafemap *bmsafemap; + struct buf *bp; /* The cg block. */ +{ + struct jaddref *jaddref; + struct jnewblk *jnewblk; + uint8_t *inosused; + uint8_t *blksfree; + struct cg *cgp; + struct fs *fs; + int cleared; + ino_t ino; + long bno; + int i; + + if (bmsafemap->sm_state & IOSTARTED) + panic("initiate_write_bmsafemap: Already started\n"); + bmsafemap->sm_state |= IOSTARTED; + /* + * Clear any inode allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + inosused = cg_inosused(cgp); + LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) { + ino = jaddref->ja_ino % fs->fs_ipg; + /* + * If this is a background copy the inode may not + * be marked used yet. + */ + if (isset(inosused, ino)) { + if ((jaddref->ja_mode & IFMT) == IFDIR) + cgp->cg_cs.cs_ndir--; + cgp->cg_cs.cs_nifree++; + clrbit(inosused, ino); + jaddref->ja_state &= ~ATTACHED; + jaddref->ja_state |= UNDONE; + stat_jaddref++; + } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + panic("initiate_write_bmsafemap: inode %d " + "marked free", jaddref->ja_ino); + } + } + /* + * Clear any block allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + blksfree = cg_blksfree(cgp); + LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { + bno = dtogd(fs, jnewblk->jn_blkno); + cleared = 0; + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; + i++) { + if (isclr(blksfree, bno + i)) { + cleared = 1; + setbit(blksfree, bno + i); + } + } + /* + * We may not clear the block if it's a background + * copy. In that case there is no reason to detach + * it. + */ + if (cleared) { + stat_jnewblk++; + jnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state |= UNDONE; + } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + panic("initiate_write_bmsafemap: block %jd " + "marked free", jnewblk->jn_blkno); + } + } + /* + * Move allocation lists to the written lists so they can be + * cleared once the block write is complete. + */ + LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr, + inodedep, id_deps); + LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, + newblk, nb_deps); } /* @@ -4246,6 +8908,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * a request completion). It should be called early in this * procedure, before the block is made available to other * processes or other routines are called. + * */ static void softdep_disk_write_complete(bp) @@ -4254,12 +8917,7 @@ softdep_disk_write_complete(bp) struct worklist *wk; struct worklist *owk; struct workhead reattach; - struct newblk *newblk; - struct allocindir *aip; - struct allocdirect *adp; - struct indirdep *indirdep; - struct inodedep *inodedep; - struct bmsafemap *bmsafemap; + struct buf *sbp; /* * If an error occurred while doing the write, then the data @@ -4271,8 +8929,9 @@ softdep_disk_write_complete(bp) /* * This lock must not be released anywhere in this code segment. */ - ACQUIRE_LOCK(&lk); + sbp = NULL; owk = NULL; + ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { WORKLIST_REMOVE(wk); if (wk == owk) @@ -4291,33 +8950,8 @@ softdep_disk_write_complete(bp) continue; case D_BMSAFEMAP: - bmsafemap = WK_BMSAFEMAP(wk); - while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) { - newblk->nb_state |= DEPCOMPLETE; - newblk->nb_bmsafemap = NULL; - LIST_REMOVE(newblk, nb_deps); - } - while ((adp = - LIST_FIRST(&bmsafemap->sm_allocdirecthd))) { - adp->ad_state |= DEPCOMPLETE; - adp->ad_buf = NULL; - LIST_REMOVE(adp, ad_deps); - handle_allocdirect_partdone(adp); - } - while ((aip = - LIST_FIRST(&bmsafemap->sm_allocindirhd))) { - aip->ai_state |= DEPCOMPLETE; - aip->ai_buf = NULL; - LIST_REMOVE(aip, ai_deps); - handle_allocindir_partdone(aip); - } - while ((inodedep = - LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) { - inodedep->id_state |= DEPCOMPLETE; - LIST_REMOVE(inodedep, id_deps); - inodedep->id_buf = NULL; - } - WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp)) + WORKLIST_INSERT(&reattach, wk); continue; case D_MKDIR: @@ -4325,35 +8959,45 @@ softdep_disk_write_complete(bp) continue; case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - adp->ad_state |= COMPLETE; - handle_allocdirect_partdone(adp); + wk->wk_state |= COMPLETE; + handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL); continue; case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - aip->ai_state |= COMPLETE; - handle_allocindir_partdone(aip); + wk->wk_state |= COMPLETE; + handle_allocindir_partdone(WK_ALLOCINDIR(wk)); continue; case D_INDIRDEP: - indirdep = WK_INDIRDEP(wk); - if (indirdep->ir_state & GOINGAWAY) - panic("disk_write_complete: indirdep gone"); - bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); - free(indirdep->ir_saveddata, M_INDIRDEP); - indirdep->ir_saveddata = 0; - indirdep->ir_state &= ~UNDONE; - indirdep->ir_state |= ATTACHED; - while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - handle_allocindir_partdone(aip); - if (aip == LIST_FIRST(&indirdep->ir_donehd)) - panic("disk_write_complete: not gone"); - } - WORKLIST_INSERT(&reattach, wk); - if ((bp->b_flags & B_DELWRI) == 0) - stat_indir_blk_ptrs++; - bdirty(bp); + if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp)) + WORKLIST_INSERT(&reattach, wk); + continue; + + case D_FREEBLKS: + wk->wk_state |= COMPLETE; + if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(wk, 1); + continue; + + case D_FREEWORK: + handle_written_freework(WK_FREEWORK(wk)); + break; + + case D_FREEDEP: + free_freedep(WK_FREEDEP(wk)); + continue; + + case D_JSEGDEP: + free_jsegdep(WK_JSEGDEP(wk)); + continue; + + case D_JSEG: + handle_written_jseg(WK_JSEG(wk), bp); + continue; + + case D_SBDEP: + if (handle_written_sbdep(WK_SBDEP(wk), bp)) + WORKLIST_INSERT(&reattach, wk); continue; default: @@ -4370,6 +9014,8 @@ softdep_disk_write_complete(bp) WORKLIST_INSERT(&bp->b_dep, wk); } FREE_LOCK(&lk); + if (sbp) + brelse(sbp); } /* @@ -4378,18 +9024,17 @@ softdep_disk_write_complete(bp) * splbio interrupts blocked. */ static void -handle_allocdirect_partdone(adp) +handle_allocdirect_partdone(adp, wkhd) struct allocdirect *adp; /* the completed allocdirect */ + struct workhead *wkhd; /* Work to do when inode is writtne. */ { struct allocdirectlst *listhead; struct allocdirect *listadp; struct inodedep *inodedep; - long bsize, delay; + long bsize; if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) return; - if (adp->ad_buf != NULL) - panic("handle_allocdirect_partdone: dangling dep"); /* * The on-disk inode cannot claim to be any larger than the last * fragment that has been written. Otherwise, the on-disk inode @@ -4439,25 +9084,27 @@ handle_allocdirect_partdone(adp) return; } /* - * If we have found the just finished dependency, then free + * If we have found the just finished dependency, then queue * it along with anything that follows it that is complete. - * If the inode still has a bitmap dependency, then it has - * never been written to disk, hence the on-disk inode cannot - * reference the old fragment so we can free it without delay. + * Since the pointer has not yet been written in the inode + * as the dependency prevents it, place the allocdirect on the + * bufwait list where it will be freed once the pointer is + * valid. */ - delay = (inodedep->id_state & DEPCOMPLETE); + if (wkhd == NULL) + wkhd = &inodedep->id_bufwait; for (; adp; adp = listadp) { listadp = TAILQ_NEXT(adp, ad_next); if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE) return; - free_allocdirect(listhead, adp, delay); + TAILQ_REMOVE(listhead, adp, ad_next); + WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list); } } /* - * Called from within softdep_disk_write_complete above. Note that - * this routine is always called from interrupt level with further - * splbio interrupts blocked. + * Called from within softdep_disk_write_complete above. This routine + * completes successfully written allocindirs. */ static void handle_allocindir_partdone(aip) @@ -4467,11 +9114,9 @@ handle_allocindir_partdone(aip) if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE) return; - if (aip->ai_buf != NULL) - panic("handle_allocindir_partdone: dangling dependency"); indirdep = aip->ai_indirdep; + LIST_REMOVE(aip, ai_next); if (indirdep->ir_state & UNDONE) { - LIST_REMOVE(aip, ai_next); LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); return; } @@ -4481,12 +9126,129 @@ handle_allocindir_partdone(aip) else ((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] = aip->ai_newblkno; - LIST_REMOVE(aip, ai_next); - if (aip->ai_freefrag != NULL) - add_to_worklist(&aip->ai_freefrag->ff_list); - WORKITEM_FREE(aip, D_ALLOCINDIR); + /* + * Await the pointer write before freeing the allocindir. + */ + LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next); } +/* + * Release segments held on a jwork list. + */ +static void +handle_jwork(wkhd) + struct workhead *wkhd; +{ + struct worklist *wk; + + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_JSEGDEP: + free_jsegdep(WK_JSEGDEP(wk)); + continue; + default: + panic("handle_jwork: Unknown type %s\n", + TYPENAME(wk->wk_type)); + } + } +} + +/* + * Handle the bufwait list on an inode when it is safe to release items + * held there. This normally happens after an inode block is written but + * may be delayed and handle later if there are pending journal items that + * are not yet safe to be released. + */ +static struct freefile * +handle_bufwait(inodedep, refhd) + struct inodedep *inodedep; + struct workhead *refhd; +{ + struct jaddref *jaddref; + struct freefile *freefile; + struct worklist *wk; + + freefile = NULL; + while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_FREEFILE: + /* + * We defer adding freefile to the worklist + * until all other additions have been made to + * ensure that it will be done after all the + * old blocks have been freed. + */ + if (freefile != NULL) + panic("handle_bufwait: freefile"); + freefile = WK_FREEFILE(wk); + continue; + + case D_MKDIR: + handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT); + continue; + + case D_DIRADD: + diradd_inode_written(WK_DIRADD(wk), inodedep); + continue; + + case D_FREEFRAG: + wk->wk_state |= COMPLETE; + if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(wk, 0); + continue; + + case D_DIRREM: + wk->wk_state |= COMPLETE; + add_to_worklist(wk, 0); + continue; + + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + free_newblk(WK_NEWBLK(wk)); + continue; + + case D_JNEWBLK: + wk->wk_state |= COMPLETE; + free_jnewblk(WK_JNEWBLK(wk)); + continue; + + /* + * Save freed journal segments and add references on + * the supplied list which will delay their release + * until the cg bitmap is cleared on disk. + */ + case D_JSEGDEP: + if (refhd == NULL) + free_jsegdep(WK_JSEGDEP(wk)); + else + WORKLIST_INSERT(refhd, wk); + continue; + + case D_JADDREF: + jaddref = WK_JADDREF(wk); + TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, + if_deps); + /* + * Transfer any jaddrefs to the list to be freed with + * the bitmap if we're handling a removed file. + */ + if (refhd == NULL) { + wk->wk_state |= COMPLETE; + free_jaddref(jaddref); + } else + WORKLIST_INSERT(refhd, wk); + continue; + + default: + panic("handle_bufwait: Unknown type %p(%s)", + wk, TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + } + return (freefile); +} /* * Called from within softdep_disk_write_complete above to restore * in-memory inode block contents to their most up-to-date state. Note @@ -4498,12 +9260,17 @@ handle_written_inodeblock(inodedep, bp) struct inodedep *inodedep; struct buf *bp; /* buffer containing the inode block */ { - struct worklist *wk, *filefree; + struct freefile *freefile; struct allocdirect *adp, *nextadp; struct ufs1_dinode *dp1 = NULL; struct ufs2_dinode *dp2 = NULL; + struct workhead wkhd; int hadchanges, fstype; + ino_t freelink; + LIST_INIT(&wkhd); + hadchanges = 0; + freefile = NULL; if ((inodedep->id_state & IOSTARTED) == 0) panic("handle_written_inodeblock: not started"); inodedep->id_state &= ~IOSTARTED; @@ -4511,11 +9278,32 @@ handle_written_inodeblock(inodedep, bp) fstype = UFS1; dp1 = (struct ufs1_dinode *)bp->b_data + ino_to_fsbo(inodedep->id_fs, inodedep->id_ino); + freelink = dp1->di_freelink; } else { fstype = UFS2; dp2 = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(inodedep->id_fs, inodedep->id_ino); + freelink = dp2->di_freelink; } + /* + * If we wrote a valid freelink pointer during the last write + * record it here. + */ + if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) { + struct inodedep *inon; + + inon = TAILQ_NEXT(inodedep, id_unlinked); + if ((inon == NULL && freelink == 0) || + (inon && inon->id_ino == freelink)) { + if (inon) + inon->id_state |= UNLINKPREV; + inodedep->id_state |= UNLINKNEXT; + } else + hadchanges = 1; + } + /* Leave this inodeblock dirty until it's in the list. */ + if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED) + hadchanges = 1; /* * If we had to rollback the inode allocation because of * bitmaps being incomplete, then simply restore it. @@ -4524,6 +9312,7 @@ handle_written_inodeblock(inodedep, bp) * corresponding updates written to disk. */ if (inodedep->id_savedino1 != NULL) { + hadchanges = 1; if (fstype == UFS1) *dp1 = *inodedep->id_savedino1; else @@ -4533,6 +9322,13 @@ handle_written_inodeblock(inodedep, bp) if ((bp->b_flags & B_DELWRI) == 0) stat_inode_bitmap++; bdirty(bp); + /* + * If the inode is clear here and GOINGAWAY it will never + * be written. Process the bufwait and clear any pending + * work which may include the freefile. + */ + if (inodedep->id_state & GOINGAWAY) + goto bufwait; return (1); } inodedep->id_state |= COMPLETE; @@ -4540,50 +9336,49 @@ handle_written_inodeblock(inodedep, bp) * Roll forward anything that had to be rolled back before * the inode could be updated. */ - hadchanges = 0; for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) { nextadp = TAILQ_NEXT(adp, ad_next); if (adp->ad_state & ATTACHED) panic("handle_written_inodeblock: new entry"); if (fstype == UFS1) { - if (adp->ad_lbn < NDADDR) { - if (dp1->di_db[adp->ad_lbn]!=adp->ad_oldblkno) + if (adp->ad_offset < NDADDR) { + if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno) panic("%s %s #%jd mismatch %d != %jd", "handle_written_inodeblock:", "direct pointer", - (intmax_t)adp->ad_lbn, - dp1->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, + dp1->di_db[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp1->di_db[adp->ad_lbn] = adp->ad_newblkno; + dp1->di_db[adp->ad_offset] = adp->ad_newblkno; } else { - if (dp1->di_ib[adp->ad_lbn - NDADDR] != 0) + if (dp1->di_ib[adp->ad_offset - NDADDR] != 0) panic("%s: %s #%jd allocated as %d", "handle_written_inodeblock", "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, - dp1->di_ib[adp->ad_lbn - NDADDR]); - dp1->di_ib[adp->ad_lbn - NDADDR] = + (intmax_t)adp->ad_offset - NDADDR, + dp1->di_ib[adp->ad_offset - NDADDR]); + dp1->di_ib[adp->ad_offset - NDADDR] = adp->ad_newblkno; } } else { - if (adp->ad_lbn < NDADDR) { - if (dp2->di_db[adp->ad_lbn]!=adp->ad_oldblkno) + if (adp->ad_offset < NDADDR) { + if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno) panic("%s: %s #%jd %s %jd != %jd", "handle_written_inodeblock", "direct pointer", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_db[adp->ad_lbn], + (intmax_t)adp->ad_offset, "mismatch", + (intmax_t)dp2->di_db[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp2->di_db[adp->ad_lbn] = adp->ad_newblkno; + dp2->di_db[adp->ad_offset] = adp->ad_newblkno; } else { - if (dp2->di_ib[adp->ad_lbn - NDADDR] != 0) + if (dp2->di_ib[adp->ad_offset - NDADDR] != 0) panic("%s: %s #%jd allocated as %jd", "handle_written_inodeblock", "indirect pointer", - (intmax_t)adp->ad_lbn - NDADDR, + (intmax_t)adp->ad_offset - NDADDR, (intmax_t) - dp2->di_ib[adp->ad_lbn - NDADDR]); - dp2->di_ib[adp->ad_lbn - NDADDR] = + dp2->di_ib[adp->ad_offset - NDADDR]); + dp2->di_ib[adp->ad_offset - NDADDR] = adp->ad_newblkno; } } @@ -4595,13 +9390,13 @@ handle_written_inodeblock(inodedep, bp) nextadp = TAILQ_NEXT(adp, ad_next); if (adp->ad_state & ATTACHED) panic("handle_written_inodeblock: new entry"); - if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno) + if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno) panic("%s: direct pointers #%jd %s %jd != %jd", "handle_written_inodeblock", - (intmax_t)adp->ad_lbn, "mismatch", - (intmax_t)dp2->di_extb[adp->ad_lbn], + (intmax_t)adp->ad_offset, "mismatch", + (intmax_t)dp2->di_extb[adp->ad_offset], (intmax_t)adp->ad_oldblkno); - dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno; + dp2->di_extb[adp->ad_offset] = adp->ad_newblkno; adp->ad_state &= ~UNDONE; adp->ad_state |= ATTACHED; hadchanges = 1; @@ -4613,12 +9408,23 @@ handle_written_inodeblock(inodedep, bp) */ if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1) panic("handle_written_inodeblock: bad size"); + if (inodedep->id_savednlink > LINK_MAX) + panic("handle_written_inodeblock: Invalid link count " + "%d for inodedep %p", inodedep->id_savednlink, inodedep); if (fstype == UFS1) { + if (dp1->di_nlink != inodedep->id_savednlink) { + dp1->di_nlink = inodedep->id_savednlink; + hadchanges = 1; + } if (dp1->di_size != inodedep->id_savedsize) { dp1->di_size = inodedep->id_savedsize; hadchanges = 1; } } else { + if (dp2->di_nlink != inodedep->id_savednlink) { + dp2->di_nlink = inodedep->id_savednlink; + hadchanges = 1; + } if (dp2->di_size != inodedep->id_savedsize) { dp2->di_size = inodedep->id_savedsize; hadchanges = 1; @@ -4630,6 +9436,7 @@ handle_written_inodeblock(inodedep, bp) } inodedep->id_savedsize = -1; inodedep->id_savedextsize = -1; + inodedep->id_savednlink = -1; /* * If there were any rollbacks in the inode block, then it must be * marked dirty so that its will eventually get written back in @@ -4637,69 +9444,49 @@ handle_written_inodeblock(inodedep, bp) */ if (hadchanges) bdirty(bp); +bufwait: /* * Process any allocdirects that completed during the update. */ if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL) - handle_allocdirect_partdone(adp); + handle_allocdirect_partdone(adp, &wkhd); if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL) - handle_allocdirect_partdone(adp); + handle_allocdirect_partdone(adp, &wkhd); /* * Process deallocations that were held pending until the * inode had been written to disk. Freeing of the inode * is delayed until after all blocks have been freed to * avoid creation of new triples - * before the old ones have been deleted. + * before the old ones have been deleted. Completely + * unlinked inodes are not processed until the unlinked + * inode list is written or the last reference is removed. */ - filefree = NULL; - while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) { - WORKLIST_REMOVE(wk); - switch (wk->wk_type) { - - case D_FREEFILE: - /* - * We defer adding filefree to the worklist until - * all other additions have been made to ensure - * that it will be done after all the old blocks - * have been freed. - */ - if (filefree != NULL) - panic("handle_written_inodeblock: filefree"); - filefree = wk; - continue; - - case D_MKDIR: - handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT); - continue; - - case D_DIRADD: - diradd_inode_written(WK_DIRADD(wk), inodedep); - continue; - - case D_FREEBLKS: - wk->wk_state |= COMPLETE; - if ((wk->wk_state & ALLCOMPLETE) != ALLCOMPLETE) - continue; - /* -- fall through -- */ - case D_FREEFRAG: - case D_DIRREM: - add_to_worklist(wk); - continue; - - case D_NEWDIRBLK: - free_newdirblk(WK_NEWDIRBLK(wk)); - continue; - - default: - panic("handle_written_inodeblock: Unknown type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ + if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) != UNLINKED) { + freefile = handle_bufwait(inodedep, NULL); + if (freefile && !LIST_EMPTY(&wkhd)) { + WORKLIST_INSERT(&wkhd, &freefile->fx_list); + freefile = NULL; } } - if (filefree != NULL) { + /* + * Move rolled forward dependency completions to the bufwait list + * now that those that were already written have been processed. + */ + if (!LIST_EMPTY(&wkhd) && hadchanges == 0) + panic("handle_written_inodeblock: bufwait but no changes"); + jwork_move(&inodedep->id_bufwait, &wkhd); + + if (freefile != NULL) { + /* + * If the inode is goingaway it was never written. Fake up + * the state here so free_inodedep() can succeed. + */ + if (inodedep->id_state & GOINGAWAY) + inodedep->id_state |= COMPLETE | DEPCOMPLETE; if (free_inodedep(inodedep) == 0) - panic("handle_written_inodeblock: live inodedep"); - add_to_worklist(filefree); + panic("handle_written_inodeblock: live inodedep %p", + inodedep); + add_to_worklist(&freefile->fx_list, 0); return (0); } @@ -4707,12 +9494,101 @@ handle_written_inodeblock(inodedep, bp) * If no outstanding dependencies, free it. */ if (free_inodedep(inodedep) || - (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 && - TAILQ_FIRST(&inodedep->id_extupdt) == 0)) + (TAILQ_FIRST(&inodedep->id_inoreflst) == 0 && + TAILQ_FIRST(&inodedep->id_inoupdt) == 0 && + TAILQ_FIRST(&inodedep->id_extupdt) == 0 && + LIST_FIRST(&inodedep->id_bufwait) == 0)) return (0); return (hadchanges); } +static int +handle_written_indirdep(indirdep, bp, bpp) + struct indirdep *indirdep; + struct buf *bp; + struct buf **bpp; +{ + struct allocindir *aip; + int chgs; + + if (indirdep->ir_state & GOINGAWAY) + panic("disk_write_complete: indirdep gone"); + chgs = 0; + /* + * If there were rollbacks revert them here. + */ + if (indirdep->ir_saveddata) { + bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = 0; + chgs = 1; + } + indirdep->ir_state &= ~UNDONE; + indirdep->ir_state |= ATTACHED; + /* + * Move allocindirs with written pointers to the completehd if + * the the indirdep's pointer is not yet written. Otherwise + * free them here. + */ + while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) { + LIST_REMOVE(aip, ai_next); + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + LIST_INSERT_HEAD(&indirdep->ir_completehd, aip, + ai_next); + continue; + } + free_newblk(&aip->ai_block); + } + /* + * Move allocindirs that have finished dependency processing from + * the done list to the write list after updating the pointers. + */ + while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { + handle_allocindir_partdone(aip); + if (aip == LIST_FIRST(&indirdep->ir_donehd)) + panic("disk_write_complete: not gone"); + chgs = 1; + } + /* + * If this indirdep has been detached from its newblk during + * I/O we need to keep this dep attached to the buffer so + * deallocate_dependencies can find it and properly resolve + * any outstanding dependencies. + */ + if ((indirdep->ir_state & (ONDEPLIST | DEPCOMPLETE)) == 0) + chgs = 1; + if ((bp->b_flags & B_DELWRI) == 0) + stat_indir_blk_ptrs++; + /* + * If there were no changes we can discard the savedbp and detach + * ourselves from the buf. We are only carrying completed pointers + * in this case. + */ + if (chgs == 0) { + struct buf *sbp; + + sbp = indirdep->ir_savebp; + sbp->b_flags |= B_INVAL | B_NOCACHE; + indirdep->ir_savebp = NULL; + if (*bpp != NULL) + panic("handle_written_indirdep: bp already exists."); + *bpp = sbp; + } else + bdirty(bp); + /* + * If there are no fresh dependencies and none waiting on writes + * we can free the indirdep. + */ + if ((indirdep->ir_state & DEPCOMPLETE) && chgs == 0) { + if (indirdep->ir_state & ONDEPLIST) + LIST_REMOVE(indirdep, ir_next); + free_indirdep(indirdep); + return (0); + } + + return (chgs); +} + /* * Process a diradd entry after its dependent inode has been written. * This routine must be called with splbio interrupts blocked. @@ -4722,20 +9598,163 @@ diradd_inode_written(dap, inodedep) struct diradd *dap; struct inodedep *inodedep; { - struct pagedep *pagedep; dap->da_state |= COMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - } + complete_diradd(dap); WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list); } +/* + * Returns true if the bmsafemap will have rollbacks when written. Must + * only be called with lk and the buf lock on the cg held. + */ +static int +bmsafemap_rollbacks(bmsafemap) + struct bmsafemap *bmsafemap; +{ + + return (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | + !LIST_EMPTY(&bmsafemap->sm_jnewblkhd)); +} + +/* + * Complete a write to a bmsafemap structure. Roll forward any bitmap + * changes if it's not a background write. Set all written dependencies + * to DEPCOMPLETE and free the structure if possible. + */ +static int +handle_written_bmsafemap(bmsafemap, bp) + struct bmsafemap *bmsafemap; + struct buf *bp; +{ + struct newblk *newblk; + struct inodedep *inodedep; + struct jaddref *jaddref, *jatmp; + struct jnewblk *jnewblk, *jntmp; + uint8_t *inosused; + uint8_t *blksfree; + struct cg *cgp; + struct fs *fs; + ino_t ino; + long bno; + int chgs; + int i; + + if ((bmsafemap->sm_state & IOSTARTED) == 0) + panic("initiate_write_bmsafemap: Not started\n"); + chgs = 0; + bmsafemap->sm_state &= ~IOSTARTED; + /* + * Restore unwritten inode allocation pending jaddref writes. + */ + if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + inosused = cg_inosused(cgp); + LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd, + ja_bmdeps, jatmp) { + if ((jaddref->ja_state & UNDONE) == 0) + continue; + ino = jaddref->ja_ino % fs->fs_ipg; + if (isset(inosused, ino)) + panic("handle_written_bmsafemap: " + "re-allocated inode"); + if ((bp->b_xflags & BX_BKGRDMARKER) == 0) { + if ((jaddref->ja_mode & IFMT) == IFDIR) + cgp->cg_cs.cs_ndir++; + cgp->cg_cs.cs_nifree--; + setbit(inosused, ino); + chgs = 1; + } + jaddref->ja_state &= ~UNDONE; + jaddref->ja_state |= ATTACHED; + free_jaddref(jaddref); + } + } + /* + * Restore any block allocations which are pending journal writes. + */ + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) { + cgp = (struct cg *)bp->b_data; + fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; + blksfree = cg_blksfree(cgp); + LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps, + jntmp) { + if ((jnewblk->jn_state & UNDONE) == 0) + continue; + bno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; + i++) { + if (bp->b_xflags & BX_BKGRDMARKER) + break; + if ((jnewblk->jn_state & NEWBLOCK) == 0 && + isclr(blksfree, bno + i)) + panic("handle_written_bmsafemap: " + "re-allocated fragment"); + clrbit(blksfree, bno + i); + chgs = 1; + } + jnewblk->jn_state &= ~(UNDONE | NEWBLOCK); + jnewblk->jn_state |= ATTACHED; + free_jnewblk(jnewblk); + } + } + while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) { + newblk->nb_state |= DEPCOMPLETE; + newblk->nb_state &= ~ONDEPLIST; + newblk->nb_bmsafemap = NULL; + LIST_REMOVE(newblk, nb_deps); + if (newblk->nb_list.wk_type == D_ALLOCDIRECT) + handle_allocdirect_partdone( + WK_ALLOCDIRECT(&newblk->nb_list), NULL); + else if (newblk->nb_list.wk_type == D_ALLOCINDIR) + handle_allocindir_partdone( + WK_ALLOCINDIR(&newblk->nb_list)); + else if (newblk->nb_list.wk_type != D_NEWBLK) + panic("handle_written_bmsafemap: Unexpected type: %s", + TYPENAME(newblk->nb_list.wk_type)); + } + while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) { + inodedep->id_state |= DEPCOMPLETE; + inodedep->id_state &= ~ONDEPLIST; + LIST_REMOVE(inodedep, id_deps); + inodedep->id_bmsafemap = NULL; + } + if (LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && + LIST_EMPTY(&bmsafemap->sm_jnewblkhd) && + LIST_EMPTY(&bmsafemap->sm_newblkhd) && + LIST_EMPTY(&bmsafemap->sm_inodedephd)) { + if (chgs) + bdirty(bp); + LIST_REMOVE(bmsafemap, sm_hash); + WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); + return (0); + } + bdirty(bp); + return (1); +} + +/* + * Try to free a mkdir dependency. + */ +static void +complete_mkdir(mkdir) + struct mkdir *mkdir; +{ + struct diradd *dap; + + if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE) + return; + LIST_REMOVE(mkdir, md_mkdirs); + dap = mkdir->md_diradd; + dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)); + if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) { + dap->da_state |= DEPCOMPLETE; + complete_diradd(dap); + } + WORKITEM_FREE(mkdir, D_MKDIR); +} + /* * Handle the completion of a mkdir dependency. */ @@ -4744,25 +9763,32 @@ handle_written_mkdir(mkdir, type) struct mkdir *mkdir; int type; { - struct diradd *dap; - struct pagedep *pagedep; - if (mkdir->md_state != type) + if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type) panic("handle_written_mkdir: bad type"); - dap = mkdir->md_diradd; - dap->da_state &= ~type; - if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) - dap->da_state |= DEPCOMPLETE; - if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) { - if (dap->da_state & DIRCHG) - pagedep = dap->da_previous->dm_pagedep; - else - pagedep = dap->da_pagedep; - LIST_REMOVE(dap, da_pdlist); - LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist); - } - LIST_REMOVE(mkdir, md_mkdirs); - WORKITEM_FREE(mkdir, D_MKDIR); + mkdir->md_state |= COMPLETE; + complete_mkdir(mkdir); +} + +static void +free_pagedep(pagedep) + struct pagedep *pagedep; +{ + int i; + + if (pagedep->pd_state & (NEWBLOCK | ONWORKLIST)) + return; + for (i = 0; i < DAHASHSZ; i++) + if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) + return; + if (!LIST_EMPTY(&pagedep->pd_jmvrefhd)) + return; + if (!LIST_EMPTY(&pagedep->pd_dirremhd)) + return; + if (!LIST_EMPTY(&pagedep->pd_pendinghd)) + return; + LIST_REMOVE(pagedep, pd_hash); + WORKITEM_FREE(pagedep, D_PAGEDEP); } /* @@ -4790,8 +9816,11 @@ handle_written_filepage(pagedep, bp) */ while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) { LIST_REMOVE(dirrem, dm_next); + dirrem->dm_state |= COMPLETE; dirrem->dm_dirinum = pagedep->pd_ino; - add_to_worklist(&dirrem->dm_list); + KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd), + ("handle_written_filepage: Journal entries not written.")); + add_to_worklist(&dirrem->dm_list, 0); } /* * Free any directory additions that have been committed. @@ -4800,7 +9829,7 @@ handle_written_filepage(pagedep, bp) */ if ((pagedep->pd_state & NEWBLOCK) == 0) while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); + free_diradd(dap, NULL); /* * Uncommitted directory entries must be restored. */ @@ -4845,7 +9874,8 @@ handle_written_filepage(pagedep, bp) * Otherwise it will remain to track any new entries on * the page in case they are fsync'ed. */ - if ((pagedep->pd_state & NEWBLOCK) == 0) { + if ((pagedep->pd_state & NEWBLOCK) == 0 && + LIST_EMPTY(&pagedep->pd_jmvrefhd)) { LIST_REMOVE(pagedep, pd_hash); WORKITEM_FREE(pagedep, D_PAGEDEP); } @@ -4880,8 +9910,8 @@ softdep_load_inodeblock(ip) */ ip->i_effnlink = ip->i_nlink; ACQUIRE_LOCK(&lk); - if (inodedep_lookup(UFSTOVFS(ip->i_ump), - ip->i_number, 0, &inodedep) == 0) { + if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, + &inodedep) == 0) { FREE_LOCK(&lk); return; } @@ -4908,11 +9938,26 @@ softdep_update_inodeblock(ip, bp, waitfor) int waitfor; /* nonzero => update must be allowed */ { struct inodedep *inodedep; + struct inoref *inoref; struct worklist *wk; struct mount *mp; struct buf *ibp; + struct fs *fs; int error; + mp = UFSTOVFS(ip->i_ump); + fs = ip->i_fs; + /* + * Preserve the freelink that is on disk. clear_unlinked_inodedep() + * does not have access to the in-core ip so must write directly into + * the inode block buffer when setting freelink. + */ + if (fs->fs_magic == FS_UFS1_MAGIC) + DIP_SET(ip, i_freelink, ((struct ufs1_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number))->di_freelink); + else + DIP_SET(ip, i_freelink, ((struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number))->di_freelink); /* * If the effective link count is not equal to the actual link * count, then we must track the difference in an inodedep while @@ -4920,8 +9965,8 @@ softdep_update_inodeblock(ip, bp, waitfor) * if there is no existing inodedep, then there are no dependencies * to track. */ - mp = UFSTOVFS(ip->i_ump); ACQUIRE_LOCK(&lk); +again: if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) { FREE_LOCK(&lk); if (ip->i_effnlink != ip->i_nlink) @@ -4930,6 +9975,20 @@ softdep_update_inodeblock(ip, bp, waitfor) } if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) panic("softdep_update_inodeblock: bad delta"); + /* + * If we're flushing all dependencies we must also move any waiting + * for journal writes onto the bufwait list prior to I/O. + */ + if (waitfor) { + TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { + if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) + == DEPCOMPLETE) { + stat_jwait_inode++; + jwait(&inoref->if_list); + goto again; + } + } + } /* * Changes have been initiated. Anything depending on these * changes cannot occur until this inode has been written. @@ -4945,10 +10004,12 @@ softdep_update_inodeblock(ip, bp, waitfor) */ merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt); if (!TAILQ_EMPTY(&inodedep->id_inoupdt)) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt)); + handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt), + NULL); merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); if (!TAILQ_EMPTY(&inodedep->id_extupdt)) - handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt)); + handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt), + NULL); /* * Now that the inode has been pushed into the buffer, the * operations dependent on the inode being written to disk @@ -4971,11 +10032,11 @@ softdep_update_inodeblock(ip, bp, waitfor) return; } retry: - if ((inodedep->id_state & DEPCOMPLETE) != 0) { + if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) { FREE_LOCK(&lk); return; } - ibp = inodedep->id_buf; + ibp = inodedep->id_bmsafemap->sm_buf; ibp = getdirtybuf(ibp, &lk, MNT_WAIT); if (ibp == NULL) { /* @@ -5007,13 +10068,13 @@ merge_inode_lists(newlisthead, oldlisthead) newadp = TAILQ_FIRST(newlisthead); for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) { - if (listadp->ad_lbn < newadp->ad_lbn) { + if (listadp->ad_offset < newadp->ad_offset) { listadp = TAILQ_NEXT(listadp, ad_next); continue; } TAILQ_REMOVE(newlisthead, newadp, ad_next); TAILQ_INSERT_BEFORE(listadp, newadp, ad_next); - if (listadp->ad_lbn == newadp->ad_lbn) { + if (listadp->ad_offset == newadp->ad_offset) { allocdirect_merge(oldlisthead, newadp, listadp); listadp = newadp; @@ -5036,6 +10097,7 @@ softdep_fsync(vp) { struct inodedep *inodedep; struct pagedep *pagedep; + struct inoref *inoref; struct worklist *wk; struct diradd *dap; struct mount *mp; @@ -5052,17 +10114,25 @@ softdep_fsync(vp) fs = ip->i_fs; mp = vp->v_mount; ACQUIRE_LOCK(&lk); +restart: if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) { FREE_LOCK(&lk); return (0); } + TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { + if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) + == DEPCOMPLETE) { + stat_jwait_inode++; + jwait(&inoref->if_list); + goto restart; + } + } if (!LIST_EMPTY(&inodedep->id_inowait) || - !LIST_EMPTY(&inodedep->id_bufwait) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || !TAILQ_EMPTY(&inodedep->id_inoupdt) || !TAILQ_EMPTY(&inodedep->id_newinoupdt)) - panic("softdep_fsync: pending ops"); + panic("softdep_fsync: pending ops %p", inodedep); for (error = 0, flushparent = 0; ; ) { if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL) break; @@ -5254,8 +10324,8 @@ int softdep_sync_metadata(struct vnode *vp) { struct pagedep *pagedep; - struct allocdirect *adp; struct allocindir *aip; + struct newblk *newblk; struct buf *bp, *nbp; struct worklist *wk; struct bufobj *bo; @@ -5319,27 +10389,16 @@ loop: switch (wk->wk_type) { case D_ALLOCDIRECT: - adp = WK_ALLOCDIRECT(wk); - if (adp->ad_state & DEPCOMPLETE) - continue; - nbp = adp->ad_buf; - nbp = getdirtybuf(nbp, &lk, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = bwrite(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - if (aip->ai_state & DEPCOMPLETE) + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; + jwait(&newblk->nb_jnewblk->jn_list); + goto restart; + } + if (newblk->nb_state & DEPCOMPLETE) continue; - nbp = aip->ai_buf; + nbp = newblk->nb_bmsafemap->sm_buf; nbp = getdirtybuf(nbp, &lk, waitfor); if (nbp == NULL) continue; @@ -5355,10 +10414,17 @@ loop: case D_INDIRDEP: restart: - LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { - if (aip->ai_state & DEPCOMPLETE) + LIST_FOREACH(aip, + &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { + newblk = (struct newblk *)aip; + if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; + jwait(&newblk->nb_jnewblk->jn_list); + goto restart; + } + if (newblk->nb_state & DEPCOMPLETE) continue; - nbp = aip->ai_buf; + nbp = newblk->nb_bmsafemap->sm_buf; nbp = getdirtybuf(nbp, &lk, MNT_WAIT); if (nbp == NULL) goto restart; @@ -5371,14 +10437,6 @@ loop: } continue; - case D_INODEDEP: - if ((error = flush_inodedep_deps(wk->wk_mp, - WK_INODEDEP(wk)->id_ino)) != 0) { - FREE_LOCK(&lk); - break; - } - continue; - case D_PAGEDEP: /* * We are trying to sync a directory that may @@ -5400,48 +10458,6 @@ loop: } continue; - case D_MKDIR: - /* - * This case should never happen if the vnode has - * been properly sync'ed. However, if this function - * is used at a place where the vnode has not yet - * been sync'ed, this dependency can show up. So, - * rather than panic, just flush it. - */ - nbp = WK_MKDIR(wk)->md_buf; - nbp = getdirtybuf(nbp, &lk, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = bwrite(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - - case D_BMSAFEMAP: - /* - * This case should never happen if the vnode has - * been properly sync'ed. However, if this function - * is used at a place where the vnode has not yet - * been sync'ed, this dependency can show up. So, - * rather than panic, just flush it. - */ - nbp = WK_BMSAFEMAP(wk)->sm_buf; - nbp = getdirtybuf(nbp, &lk, waitfor); - if (nbp == NULL) - continue; - FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = bwrite(nbp)) != 0) { - break; - } - ACQUIRE_LOCK(&lk); - continue; - default: panic("softdep_sync_metadata: Unknown type %s", TYPENAME(wk->wk_type)); @@ -5489,7 +10505,8 @@ loop: BO_LOCK(bo); drain_output(vp); BO_UNLOCK(bo); - return (0); + return ffs_update(vp, 1); + /* return (0); */ } /* @@ -5502,6 +10519,7 @@ flush_inodedep_deps(mp, ino) ino_t ino; { struct inodedep *inodedep; + struct inoref *inoref; int error, waitfor; /* @@ -5522,8 +10540,17 @@ flush_inodedep_deps(mp, ino) return (error); FREE_LOCK(&lk); ACQUIRE_LOCK(&lk); +restart: if (inodedep_lookup(mp, ino, 0, &inodedep) == 0) return (0); + TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { + if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) + == DEPCOMPLETE) { + stat_jwait_inode++; + jwait(&inoref->if_list); + goto restart; + } + } if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) || flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) || flush_deplist(&inodedep->id_extupdt, waitfor, &error) || @@ -5555,13 +10582,20 @@ flush_deplist(listhead, waitfor, errorp) int *errorp; { struct allocdirect *adp; + struct newblk *newblk; struct buf *bp; mtx_assert(&lk, MA_OWNED); TAILQ_FOREACH(adp, listhead, ad_next) { - if (adp->ad_state & DEPCOMPLETE) + newblk = (struct newblk *)adp; + if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; + jwait(&newblk->nb_jnewblk->jn_list); + return (1); + } + if (newblk->nb_state & DEPCOMPLETE) continue; - bp = adp->ad_buf; + bp = newblk->nb_bmsafemap->sm_buf; bp = getdirtybuf(bp, &lk, waitfor); if (bp == NULL) { if (waitfor == MNT_NOWAIT) @@ -5581,6 +10615,101 @@ flush_deplist(listhead, waitfor, errorp) return (0); } +/* + * Flush dependencies associated with an allocdirect block. + */ +static int +flush_newblk_dep(vp, mp, lbn) + struct vnode *vp; + struct mount *mp; + ufs_lbn_t lbn; +{ + struct newblk *newblk; + struct bufobj *bo; + struct inode *ip; + struct buf *bp; + ufs2_daddr_t blkno; + int error; + + error = 0; + bo = &vp->v_bufobj; + ip = VTOI(vp); + blkno = DIP(ip, i_db[lbn]); + if (blkno == 0) + panic("flush_newblk_dep: Missing block"); + ACQUIRE_LOCK(&lk); + /* + * Loop until all dependencies related to this block are satisfied. + * We must be careful to restart after each sleep in case a write + * completes some part of this process for us. + */ + for (;;) { + if (newblk_lookup(mp, blkno, 0, &newblk) == 0) { + FREE_LOCK(&lk); + break; + } + if (newblk->nb_list.wk_type != D_ALLOCDIRECT) + panic("flush_newblk_deps: Bad newblk %p", newblk); + /* + * Flush the journal. + */ + if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; + jwait(&newblk->nb_jnewblk->jn_list); + continue; + } + /* + * Write the bitmap dependency. + */ + if ((newblk->nb_state & DEPCOMPLETE) == 0) { + bp = newblk->nb_bmsafemap->sm_buf; + bp = getdirtybuf(bp, &lk, MNT_WAIT); + if (bp == NULL) + continue; + FREE_LOCK(&lk); + error = bwrite(bp); + if (error) + break; + ACQUIRE_LOCK(&lk); + continue; + } + /* + * Write the buffer. + */ + FREE_LOCK(&lk); + BO_LOCK(bo); + bp = gbincore(bo, lbn); + if (bp != NULL) { + error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | + LK_INTERLOCK, BO_MTX(bo)); + if (error == ENOLCK) { + ACQUIRE_LOCK(&lk); + continue; /* Slept, retry */ + } + if (error != 0) + break; /* Failed */ + if (bp->b_flags & B_DELWRI) { + bremfree(bp); + error = bwrite(bp); + if (error) + break; + } else + BUF_UNLOCK(bp); + } else + BO_UNLOCK(bo); + /* + * We have to wait for the direct pointers to + * point at the newdirblk before the dependency + * will go away. + */ + error = ffs_update(vp, MNT_WAIT); + if (error) + break; + ACQUIRE_LOCK(&lk); + } + return (error); +} + /* * Eliminate a pagedep dependency by flushing out all its diradd dependencies. * Called with splbio blocked. @@ -5592,16 +10721,16 @@ flush_pagedep_deps(pvp, mp, diraddhdp) struct diraddhd *diraddhdp; { struct inodedep *inodedep; + struct inoref *inoref; struct ufsmount *ump; struct diradd *dap; struct vnode *vp; - struct bufobj *bo; int error = 0; struct buf *bp; ino_t inum; - struct worklist *wk; ump = VFSTOUFS(mp); +restart: while ((dap = LIST_FIRST(diraddhdp)) != NULL) { /* * Flush ourselves if this directory entry @@ -5609,7 +10738,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) */ if (dap->da_state & MKDIR_PARENT) { FREE_LOCK(&lk); - if ((error = ffs_update(pvp, 1)) != 0) + if ((error = ffs_update(pvp, MNT_WAIT)) != 0) break; ACQUIRE_LOCK(&lk); /* @@ -5623,84 +10752,52 @@ flush_pagedep_deps(pvp, mp, diraddhdp) /* * A newly allocated directory must have its "." and * ".." entries written out before its name can be - * committed in its parent. We do not want or need - * the full semantics of a synchronous ffs_syncvnode as - * that may end up here again, once for each directory - * level in the filesystem. Instead, we push the blocks - * and wait for them to clear. We have to fsync twice - * because the first call may choose to defer blocks - * that still have dependencies, but deferral will - * happen at most once. + * committed in its parent. */ inum = dap->da_newinum; + if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0) + panic("flush_pagedep_deps: lost inode1"); + /* + * Wait for any pending journal adds to complete so we don't + * cause rollbacks while syncing. + */ + TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { + if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) + == DEPCOMPLETE) { + stat_jwait_inode++; + jwait(&inoref->if_list); + goto restart; + } + } if (dap->da_state & MKDIR_BODY) { FREE_LOCK(&lk); if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ))) break; - if ((error=ffs_syncvnode(vp, MNT_NOWAIT)) || - (error=ffs_syncvnode(vp, MNT_NOWAIT))) { - vput(vp); - break; - } - bo = &vp->v_bufobj; - BO_LOCK(bo); - drain_output(vp); + error = flush_newblk_dep(vp, mp, 0); /* - * If first block is still dirty with a D_MKDIR - * dependency then it needs to be written now. + * If we still have the dependency we might need to + * update the vnode to sync the new link count to + * disk. */ - for (;;) { - error = 0; - bp = gbincore(bo, 0); - if (bp == NULL) - break; /* First block not present */ - error = BUF_LOCK(bp, - LK_EXCLUSIVE | - LK_SLEEPFAIL | - LK_INTERLOCK, - BO_MTX(bo)); - BO_LOCK(bo); - if (error == ENOLCK) - continue; /* Slept, retry */ - if (error != 0) - break; /* Failed */ - if ((bp->b_flags & B_DELWRI) == 0) { - BUF_UNLOCK(bp); - break; /* Buffer not dirty */ - } - for (wk = LIST_FIRST(&bp->b_dep); - wk != NULL; - wk = LIST_NEXT(wk, wk_list)) - if (wk->wk_type == D_MKDIR) - break; - if (wk == NULL) - BUF_UNLOCK(bp); /* Dependency gone */ - else { - /* - * D_MKDIR dependency remains, - * must write buffer to stable - * storage. - */ - BO_UNLOCK(bo); - bremfree(bp); - error = bwrite(bp); - BO_LOCK(bo); - } - break; - } - BO_UNLOCK(bo); + if (error == 0 && dap == LIST_FIRST(diraddhdp)) + error = ffs_update(vp, MNT_WAIT); vput(vp); if (error != 0) - break; /* Flushing of first block failed */ + break; ACQUIRE_LOCK(&lk); /* * If that cleared dependencies, go on to next. */ if (dap != LIST_FIRST(diraddhdp)) continue; - if (dap->da_state & MKDIR_BODY) - panic("flush_pagedep_deps: MKDIR_BODY"); + if (dap->da_state & MKDIR_BODY) { + inodedep_lookup(UFSTOVFS(ump), inum, 0, + &inodedep); + panic("flush_pagedep_deps: MKDIR_BODY " + "inodedep %p dap %p vp %p", + inodedep, dap, vp); + } } /* * Flush the inode on which the directory entry depends. @@ -5719,8 +10816,8 @@ retry: * If the inode still has bitmap dependencies, * push them to disk. */ - if ((inodedep->id_state & DEPCOMPLETE) == 0) { - bp = inodedep->id_buf; + if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) { + bp = inodedep->id_bmsafemap->sm_buf; bp = getdirtybuf(bp, &lk, MNT_WAIT); if (bp == NULL) goto retry; @@ -5733,24 +10830,29 @@ retry: } /* * If the inode is still sitting in a buffer waiting - * to be written, push it to disk. + * to be written or waiting for the link count to be + * adjusted update it here to flush it to disk. */ - FREE_LOCK(&lk); - if ((error = bread(ump->um_devvp, - fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)), - (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0) { - brelse(bp); - break; + if (dap == LIST_FIRST(diraddhdp)) { + FREE_LOCK(&lk); + if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, + FFSV_FORCEINSMQ))) + break; + error = ffs_update(vp, MNT_WAIT); + vput(vp); + if (error) + break; + ACQUIRE_LOCK(&lk); } - if ((error = bwrite(bp)) != 0) - break; - ACQUIRE_LOCK(&lk); /* * If we have failed to get rid of all the dependencies * then something is seriously wrong. */ - if (dap == LIST_FIRST(diraddhdp)) - panic("flush_pagedep_deps: flush failed"); + if (dap == LIST_FIRST(diraddhdp)) { + inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep); + panic("flush_pagedep_deps: failed to flush " + "inodedep %p ino %d dap %p", inodedep, inum, dap); + } } if (error) ACQUIRE_LOCK(&lk); @@ -5828,6 +10930,7 @@ softdep_request_cleanup(fs, vp) return (0); UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); + process_removes(vp); if (ump->softdep_on_worklist > 0 && process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) { stat_worklist_push += 1; @@ -6100,10 +11203,15 @@ softdep_count_dependencies(bp, wantcount) int wantcount; { struct worklist *wk; + struct bmsafemap *bmsafemap; struct inodedep *inodedep; struct indirdep *indirdep; + struct freeblks *freeblks; struct allocindir *aip; struct pagedep *pagedep; + struct dirrem *dirrem; + struct newblk *newblk; + struct mkdir *mkdir; struct diradd *dap; int i, retval; @@ -6132,6 +11240,12 @@ softdep_count_dependencies(bp, wantcount) if (!wantcount) goto out; } + if (TAILQ_FIRST(&inodedep->id_inoreflst)) { + /* Add reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } continue; case D_INDIRDEP: @@ -6147,6 +11261,14 @@ softdep_count_dependencies(bp, wantcount) case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); + LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + if (LIST_FIRST(&dirrem->dm_jremrefhd)) { + /* Journal remove ref dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + } for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { @@ -6159,14 +11281,62 @@ softdep_count_dependencies(bp, wantcount) continue; case D_BMSAFEMAP: + bmsafemap = WK_BMSAFEMAP(wk); + if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) { + /* Add reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) { + /* Allocate block dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_FREEBLKS: + freeblks = WK_FREEBLKS(wk); + if (LIST_FIRST(&freeblks->fb_jfreeblkhd)) { + /* Freeblk journal dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + case D_ALLOCDIRECT: case D_ALLOCINDIR: + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk) { + /* Journal allocate dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + case D_MKDIR: + mkdir = WK_MKDIR(wk); + if (mkdir->md_jaddref) { + /* Journal reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: + case D_JSEG: + case D_SBDEP: /* never a dependency on these blocks */ continue; default: - panic("softdep_check_for_rollback: Unexpected type %s", + panic("softdep_count_dependencies: Unexpected type %s", TYPENAME(wk->wk_type)); /* NOTREACHED */ } @@ -6382,6 +11552,45 @@ softdep_error(func, error) #ifdef DDB +static void +inodedep_print(struct inodedep *inodedep, int verbose) +{ + db_printf("%p fs %p st %x ino %jd inoblk %jd delta %d nlink %d" + " saveino %p\n", + inodedep, inodedep->id_fs, inodedep->id_state, + (intmax_t)inodedep->id_ino, + (intmax_t)fsbtodb(inodedep->id_fs, + ino_to_fsba(inodedep->id_fs, inodedep->id_ino)), + inodedep->id_nlinkdelta, inodedep->id_savednlink, + inodedep->id_savedino1); + + if (verbose == 0) + return; + + db_printf("\tpendinghd %p, bufwait %p, inowait %p, inoreflst %p, " + "mkdiradd %p\n", + LIST_FIRST(&inodedep->id_pendinghd), + LIST_FIRST(&inodedep->id_bufwait), + LIST_FIRST(&inodedep->id_inowait), + TAILQ_FIRST(&inodedep->id_inoreflst), + inodedep->id_mkdiradd); + db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n", + TAILQ_FIRST(&inodedep->id_inoupdt), + TAILQ_FIRST(&inodedep->id_newinoupdt), + TAILQ_FIRST(&inodedep->id_extupdt), + TAILQ_FIRST(&inodedep->id_newextupdt)); +} + +DB_SHOW_COMMAND(inodedep, db_show_inodedep) +{ + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + inodedep_print((struct inodedep*)addr, 1); +} + DB_SHOW_COMMAND(inodedeps, db_show_inodedeps) { struct inodedep_hashhead *inodedephd; @@ -6395,15 +11604,62 @@ DB_SHOW_COMMAND(inodedeps, db_show_inodedeps) LIST_FOREACH(inodedep, inodedephd, id_hash) { if (fs != NULL && fs != inodedep->id_fs) continue; - db_printf("%p fs %p st %x ino %jd inoblk %jd\n", - inodedep, inodedep->id_fs, inodedep->id_state, - (intmax_t)inodedep->id_ino, - (intmax_t)fsbtodb(inodedep->id_fs, - ino_to_fsba(inodedep->id_fs, inodedep->id_ino))); + inodedep_print(inodedep, 0); } } } +DB_SHOW_COMMAND(worklist, db_show_worklist) +{ + struct worklist *wk; + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + wk = (struct worklist *)addr; + printf("worklist: %p type %s state 0x%X\n", + wk, TYPENAME(wk->wk_type), wk->wk_state); +} + +DB_SHOW_COMMAND(workhead, db_show_workhead) +{ + struct workhead *wkhd; + struct worklist *wk; + int i; + + if (have_addr == 0) { + db_printf("Address required\n"); + return; + } + wkhd = (struct workhead *)addr; + wk = LIST_FIRST(wkhd); + for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list)) + db_printf("worklist: %p type %s state 0x%X", + wk, TYPENAME(wk->wk_type), wk->wk_state); + if (i == 100) + db_printf("workhead overflow"); + printf("\n"); +} + + +DB_SHOW_COMMAND(mkdirs, db_show_mkdirs) +{ + struct jaddref *jaddref; + struct diradd *diradd; + struct mkdir *mkdir; + + LIST_FOREACH(mkdir, &mkdirlisthd, md_mkdirs) { + diradd = mkdir->md_diradd; + db_printf("mkdir: %p state 0x%X dap %p state 0x%X", + mkdir, mkdir->md_state, diradd, diradd->da_state); + if ((jaddref = mkdir->md_jaddref) != NULL) + db_printf(" jaddref %p jaddref state 0x%X", + jaddref, jaddref->ja_state); + db_printf("\n"); + } +} + #endif /* DDB */ #endif /* SOFTUPDATES */ diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index e34bc1372a2..e2460a36be2 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$"); #ifndef _KERNEL #include #include -#include "fsck.h" #else #include #include @@ -223,7 +222,38 @@ ffs_isblock(fs, cp, h) mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: +#ifdef _KERNEL panic("ffs_isblock"); +#endif + break; + } + return (0); +} + +/* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + u_char *cp; + ufs1_daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: +#ifdef _KERNEL + panic("ffs_isfreeblock"); +#endif + break; } return (0); } @@ -252,7 +282,10 @@ ffs_clrblock(fs, cp, h) cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: +#ifdef _KERNEL panic("ffs_clrblock"); +#endif + break; } } @@ -281,6 +314,101 @@ ffs_setblock(fs, cp, h) cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: +#ifdef _KERNEL panic("ffs_setblock"); +#endif + break; } } + +/* + * Update the cluster map because of an allocation or free. + * + * Cnt == 1 means free; cnt == -1 means allocating. + */ +void +ffs_clusteracct(fs, cgp, blkno, cnt) + struct fs *fs; + struct cg *cgp; + ufs1_daddr_t blkno; + int cnt; +{ + int32_t *sump; + int32_t *lp; + u_char *freemapp, *mapp; + int i, start, end, forw, back, map, bit; + + if (fs->fs_contigsumsize <= 0) + return; + freemapp = cg_clustersfree(cgp); + sump = cg_clustersum(cgp); + /* + * Allocate or clear the actual block. + */ + if (cnt > 0) + setbit(freemapp, blkno); + else + clrbit(freemapp, blkno); + /* + * Find the size of the cluster going forward. + */ + start = blkno + 1; + end = start + fs->fs_contigsumsize; + if (end >= cgp->cg_nclusterblks) + end = cgp->cg_nclusterblks; + mapp = &freemapp[start / NBBY]; + map = *mapp++; + bit = 1 << (start % NBBY); + for (i = start; i < end; i++) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + forw = i - start; + /* + * Find the size of the cluster going backward. + */ + start = blkno - 1; + end = start - fs->fs_contigsumsize; + if (end < 0) + end = -1; + mapp = &freemapp[start / NBBY]; + map = *mapp--; + bit = 1 << (start % NBBY); + for (i = start; i > end; i--) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != 0) { + bit >>= 1; + } else { + map = *mapp--; + bit = 1 << (NBBY - 1); + } + } + back = start - i; + /* + * Account for old cluster and the possibly new forward and + * back clusters. + */ + i = back + forw + 1; + if (i > fs->fs_contigsumsize) + i = fs->fs_contigsumsize; + sump[i] += cnt; + if (back > 0) + sump[back] -= cnt; + if (forw > 0) + sump[forw] -= cnt; + /* + * Update cluster summary information. + */ + lp = &sump[fs->fs_contigsumsize]; + for (i = fs->fs_contigsumsize; i > 0; i--) + if (*lp-- > 0) + break; + fs->fs_maxcluster[cgp->cg_cgx] = i; +} diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 656c03666c5..2963514508e 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -79,7 +79,6 @@ static int ffs_reload(struct mount *, struct thread *); static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, ufs2_daddr_t); -static void ffs_oldfscompat_write(struct fs *, struct ufsmount *); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; @@ -127,8 +126,8 @@ static struct buf_ops ffs_ops = { static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "multilabel", - "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", - "union", "nfsv4acls", NULL }; + "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", + "union", NULL }; static int ffs_mount(struct mount *mp) @@ -299,7 +298,8 @@ ffs_mount(struct mount *mp) if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || - ((fs->fs_flags & FS_NEEDSFSCK) == 0 && + ((fs->fs_flags & + (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not %s\n", fs->fs_fsmnt, "properly dismounted"); @@ -307,6 +307,9 @@ ffs_mount(struct mount *mp) printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); + if (fs->fs_flags & FS_SUJ) + printf( +"WARNING: Forced mount will invalidated journal contents\n"); return (EPERM); } } @@ -330,17 +333,18 @@ ffs_mount(struct mount *mp) MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); - fs->fs_clean = 0; - if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { - vn_finished_write(mp); - return (error); - } + fs->fs_mtime = time_second; /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ vn_finished_write(mp); return (error); } + fs->fs_clean = 0; + if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { + vn_finished_write(mp); + return (error); + } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vn_finished_write(mp); @@ -705,7 +709,7 @@ ffs_mountfs(devvp, mp, td) if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || - ((fs->fs_flags & FS_NEEDSFSCK) == 0 && + ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf( "WARNING: %s was not properly dismounted\n", @@ -714,6 +718,9 @@ ffs_mountfs(devvp, mp, td) printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); + if (fs->fs_flags & FS_SUJ) + printf( +"WARNING: Forced mount will invalidated journal contents\n"); error = EPERM; goto out; } @@ -896,6 +903,7 @@ ffs_mountfs(devvp, mp, td) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); + mp->mnt_stat.f_iosize = fs->fs_bsize; if( mp->mnt_flag & MNT_ROOTFS) { /* @@ -907,6 +915,7 @@ ffs_mountfs(devvp, mp, td) } if (ronly == 0) { + fs->fs_mtime = time_second; if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { free(fs->fs_csp, M_UFSMNT); @@ -937,7 +946,6 @@ ffs_mountfs(devvp, mp, td) * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ - mp->mnt_stat.f_iosize = fs->fs_bsize; (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ @@ -1037,7 +1045,7 @@ ffs_oldfscompat_read(fs, ump, sblockloc) * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ -static void +void ffs_oldfscompat_write(fs, ump) struct fs *fs; struct ufsmount *ump; @@ -1132,6 +1140,7 @@ ffs_unmount(mp, mntflags) fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); + softdep_unmount(mp); if (fs->fs_ronly == 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); @@ -1573,16 +1582,6 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags) DIP_SET(ip, i_gen, ip->i_gen); } } - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_magic == FS_UFS1_MAGIC && /* XXX */ - fs->fs_old_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_uid = ip->i_din1->di_ouid; /* XXX */ - ip->i_gid = ip->i_din1->di_ogid; /* XXX */ - } /* XXX */ - #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* @@ -1726,6 +1725,8 @@ ffs_sbupdate(mp, waitfor, suspended) } fs->fs_fmod = 0; fs->fs_time = time_second; + if (fs->fs_flags & FS_DOSOFTDEP) + softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); ffs_oldfscompat_write((struct fs *)bp->b_data, mp); if (suspended) @@ -1867,9 +1868,6 @@ ffs_bufwrite(struct buf *bp) } BO_UNLOCK(bp->b_bufobj); - /* Mark the buffer clean */ - bundirty(bp); - /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the @@ -1910,9 +1908,16 @@ ffs_bufwrite(struct buf *bp) newbp->b_flags &= ~B_INVAL; #ifdef SOFTUPDATES - /* move over the dependencies */ - if (!LIST_EMPTY(&bp->b_dep)) - softdep_move_dependencies(bp, newbp); + /* + * Move over the dependencies. If there are rollbacks, + * leave the parent buffer dirtied as it will need to + * be written again. + */ + if (LIST_EMPTY(&bp->b_dep) || + softdep_move_dependencies(bp, newbp) == 0) + bundirty(bp); +#else + bundirty(bp); #endif /* @@ -1925,7 +1930,10 @@ ffs_bufwrite(struct buf *bp) */ bqrelse(bp); bp = newbp; - } + } else + /* Mark the buffer clean */ + bundirty(bp); + /* Let the normal bufwrite do the rest for us */ normal_write: @@ -1939,6 +1947,7 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp) struct vnode *vp; int error; struct buf *tbp; + int nocopy; vp = bo->__bo_vnode; if (bp->b_iocmd == BIO_WRITE) { @@ -1946,8 +1955,9 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp) bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); - bp->b_flags &= ~B_VALIDSUSPWRT; - if ((vp->v_vflag & VV_COPYONWRITE) && + nocopy = bp->b_flags & B_NOCOPY; + bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); + if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 464a7613e16..96d0c7a3e8d 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -75,9 +75,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include -#include #include #include #include @@ -225,6 +222,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor) wait = (waitfor == MNT_WAIT); lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); bo = &vp->v_bufobj; + ip->i_flag &= ~IN_NEEDSYNC; /* * Flush all dirty buffers associated with a vnode. @@ -651,7 +649,6 @@ ffs_write(ap) struct inode *ip; struct fs *fs; struct buf *bp; - struct thread *td; ufs_lbn_t lbn; off_t osize; int seqcount; @@ -703,17 +700,8 @@ ffs_write(ap) * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ - td = uio->uio_td; - if (vp->v_type == VREG && td != NULL) { - PROC_LOCK(td->td_proc); - if (uio->uio_offset + uio->uio_resid > - lim_cur(td->td_proc, RLIMIT_FSIZE)) { - psignal(td->td_proc, SIGXFSZ); - PROC_UNLOCK(td->td_proc); - return (EFBIG); - } - PROC_UNLOCK(td->td_proc); - } + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); resid = uio->uio_resid; osize = ip->i_size; @@ -859,13 +847,13 @@ ffs_getpages(ap) if (mreq->valid) { if (mreq->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(mreq, TRUE); - vm_page_lock_queues(); for (i = 0; i < pcount; i++) { if (i != ap->a_reqpage) { + vm_page_lock(ap->a_m[i]); vm_page_free(ap->a_m[i]); + vm_page_unlock(ap->a_m[i]); } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(mreq->object); return VM_PAGER_OK; } diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index 5452e2be6de..e863b961c62 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -340,7 +340,9 @@ struct fs { u_int32_t fs_avgfilesize; /* expected average file size */ u_int32_t fs_avgfpdir; /* expected # of files per directory */ int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ - int32_t fs_sparecon32[26]; /* reserved for future constants */ + ufs_time_t fs_mtime; /* Last mount or fsck time. */ + int32_t fs_sujfree; /* SUJ free list */ + int32_t fs_sparecon32[23]; /* reserved for future constants */ int32_t fs_flags; /* see FS_ flags below */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ @@ -408,12 +410,13 @@ CTASSERT(sizeof(struct fs) == 1376); #define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */ #define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */ #define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */ -#define FS_INDEXDIRS 0x0008 /* kernel supports indexed directories */ +#define FS_SUJ 0x0008 /* Filesystem using softupdate journal */ #define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */ #define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */ #define FS_GJOURNAL 0x0040 /* gjournaled file system */ #define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */ #define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */ +#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */ /* * Macros to access bits in the fs_active array. @@ -603,7 +606,31 @@ struct cg { ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (size))))) - +/* + * Indirect lbns are aligned on NDADDR addresses where single indirects + * are the negated address of the lowest lbn reachable, double indirects + * are this lbn - 1 and triple indirects are this lbn - 2. This yields + * an unusual bit order to determine level. + */ +static inline int +lbn_level(ufs_lbn_t lbn) +{ + if (lbn >= 0) + return 0; + switch (lbn & 0x3) { + case 0: + return (0); + case 1: + break; + case 2: + return (2); + case 3: + return (1); + default: + break; + } + return (-1); +} /* * Number of inodes in a secondary storage block/fragment. */ @@ -615,6 +642,108 @@ struct cg { */ #define NINDIR(fs) ((fs)->fs_nindir) +/* + * Softdep journal record format. + */ + +#define JOP_ADDREF 1 /* Add a reference to an inode. */ +#define JOP_REMREF 2 /* Remove a reference from an inode. */ +#define JOP_NEWBLK 3 /* Allocate a block. */ +#define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ +#define JOP_MVREF 5 /* Move a reference from one off to another. */ +#define JOP_TRUNC 6 /* Partial truncation record. */ + +#define JREC_SIZE 32 /* Record and segment header size. */ + +#define SUJ_MIN (4 * 1024 * 1024) /* Minimum journal size */ +#define SUJ_MAX (32 * 1024 * 1024) /* Maximum journal size */ +#define SUJ_FILE ".sujournal" /* Journal file name */ + +/* + * Size of the segment record header. There is at most one for each disk + * block n the journal. The segment header is followed by an array of + * records. fsck depends on the first element in each record being 'op' + * and the second being 'ino'. Segments may span multiple disk blocks but + * the header is present on each. + */ +struct jsegrec { + uint64_t jsr_seq; /* Our sequence number */ + uint64_t jsr_oldest; /* Oldest valid sequence number */ + uint16_t jsr_cnt; /* Count of valid records */ + uint16_t jsr_blocks; /* Count of DEV_BSIZE blocks. */ + uint32_t jsr_crc; /* 32bit crc of the valid space */ + ufs_time_t jsr_time; /* timestamp for mount instance */ +}; + +/* + * Reference record. Records a single link count modification. + */ +struct jrefrec { + uint32_t jr_op; + ino_t jr_ino; + ino_t jr_parent; + uint16_t jr_nlink; + uint16_t jr_mode; + off_t jr_diroff; + uint64_t jr_unused; +}; + +/* + * Move record. Records a reference moving within a directory block. The + * nlink is unchanged but we must search both locations. + */ +struct jmvrec { + uint32_t jm_op; + ino_t jm_ino; + ino_t jm_parent; + uint16_t jm_unused; + off_t jm_oldoff; + off_t jm_newoff; +}; + +/* + * Block record. A set of frags or tree of blocks starting at an indirect are + * freed or a set of frags are allocated. + */ +struct jblkrec { + uint32_t jb_op; + uint32_t jb_ino; + ufs2_daddr_t jb_blkno; + ufs_lbn_t jb_lbn; + uint16_t jb_frags; + uint16_t jb_oldfrags; + uint32_t jb_unused; +}; + +/* + * Truncation record. Records a partial truncation so that it may be + * completed later. + */ +struct jtrncrec { + uint32_t jt_op; + uint32_t jt_ino; + off_t jt_size; + uint32_t jt_extsize; + uint32_t jt_pad[3]; +}; + +union jrec { + struct jsegrec rec_jsegrec; + struct jrefrec rec_jrefrec; + struct jmvrec rec_jmvrec; + struct jblkrec rec_jblkrec; + struct jtrncrec rec_jtrncrec; +}; + +#ifdef CTASSERT +CTASSERT(sizeof(struct jsegrec) == JREC_SIZE); +CTASSERT(sizeof(struct jrefrec) == JREC_SIZE); +CTASSERT(sizeof(struct jmvrec) == JREC_SIZE); +CTASSERT(sizeof(struct jblkrec) == JREC_SIZE); +CTASSERT(sizeof(struct jtrncrec) == JREC_SIZE); +CTASSERT(sizeof(union jrec) == JREC_SIZE); +#endif + extern int inside[], around[]; extern u_char *fragtbl[]; diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index b00183bcfd2..5d8a8691b17 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -94,22 +94,29 @@ * The ONWORKLIST flag shows whether the structure is currently linked * onto a worklist. */ -#define ATTACHED 0x0001 -#define UNDONE 0x0002 -#define COMPLETE 0x0004 -#define DEPCOMPLETE 0x0008 -#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */ -#define MKDIR_BODY 0x0020 /* diradd & mkdir only */ -#define RMDIR 0x0040 /* dirrem only */ -#define DIRCHG 0x0080 /* diradd & dirrem only */ -#define GOINGAWAY 0x0100 /* indirdep only */ -#define IOSTARTED 0x0200 /* inodedep & pagedep only */ -#define SPACECOUNTED 0x0400 /* inodedep only */ -#define NEWBLOCK 0x0800 /* pagedep only */ -#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */ -#define UFS1FMT 0x2000 /* indirdep only */ -#define EXTDATA 0x4000 /* allocdirect only */ -#define ONWORKLIST 0x8000 +#define ATTACHED 0x000001 +#define UNDONE 0x000002 +#define COMPLETE 0x000004 +#define DEPCOMPLETE 0x000008 +#define MKDIR_PARENT 0x000010 /* diradd, mkdir, jaddref, jsegdep only */ +#define MKDIR_BODY 0x000020 /* diradd, mkdir, jaddref only */ +#define RMDIR 0x000040 /* dirrem only */ +#define DIRCHG 0x000080 /* diradd, dirrem only */ +#define GOINGAWAY 0x000100 /* indirdep, jremref only */ +#define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */ +#define SPACECOUNTED 0x000400 /* inodedep only */ +#define NEWBLOCK 0x000800 /* pagedep, jaddref only */ +#define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */ +#define UFS1FMT 0x002000 /* indirdep only */ +#define EXTDATA 0x004000 /* allocdirect only */ +#define ONWORKLIST 0x008000 +#define IOWAITING 0x010000 /* Thread is waiting for IO to complete. */ +#define ONDEPLIST 0x020000 /* Structure is on a dependency list. */ +#define UNLINKED 0x040000 /* inodedep has been unlinked. */ +#define UNLINKNEXT 0x080000 /* inodedep has valid di_freelink */ +#define UNLINKPREV 0x100000 /* inodedep is pointed at in the unlink list */ +#define UNLINKONLIST 0x200000 /* inodedep is in the unlinked list on disk */ +#define UNLINKLINKS (UNLINKNEXT | UNLINKPREV) #define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) @@ -135,25 +142,38 @@ * and the macros below changed to use it. */ struct worklist { - struct mount *wk_mp; /* Mount we live in */ LIST_ENTRY(worklist) wk_list; /* list of work requests */ - unsigned short wk_type; /* type of request */ - unsigned short wk_state; /* state flags */ + struct mount *wk_mp; /* Mount we live in */ + unsigned int wk_type:8, /* type of request */ + wk_state:24; /* state flags */ }; #define WK_DATA(wk) ((void *)(wk)) #define WK_PAGEDEP(wk) ((struct pagedep *)(wk)) #define WK_INODEDEP(wk) ((struct inodedep *)(wk)) #define WK_BMSAFEMAP(wk) ((struct bmsafemap *)(wk)) +#define WK_NEWBLK(wk) ((struct newblk *)(wk)) #define WK_ALLOCDIRECT(wk) ((struct allocdirect *)(wk)) #define WK_INDIRDEP(wk) ((struct indirdep *)(wk)) #define WK_ALLOCINDIR(wk) ((struct allocindir *)(wk)) #define WK_FREEFRAG(wk) ((struct freefrag *)(wk)) #define WK_FREEBLKS(wk) ((struct freeblks *)(wk)) +#define WK_FREEWORK(wk) ((struct freework *)(wk)) #define WK_FREEFILE(wk) ((struct freefile *)(wk)) #define WK_DIRADD(wk) ((struct diradd *)(wk)) #define WK_MKDIR(wk) ((struct mkdir *)(wk)) #define WK_DIRREM(wk) ((struct dirrem *)(wk)) #define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk)) +#define WK_JADDREF(wk) ((struct jaddref *)(wk)) +#define WK_JREMREF(wk) ((struct jremref *)(wk)) +#define WK_JMVREF(wk) ((struct jmvref *)(wk)) +#define WK_JSEGDEP(wk) ((struct jsegdep *)(wk)) +#define WK_JSEG(wk) ((struct jseg *)(wk)) +#define WK_JNEWBLK(wk) ((struct jnewblk *)(wk)) +#define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk)) +#define WK_FREEDEP(wk) ((struct freedep *)(wk)) +#define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk)) +#define WK_SBDEP(wk) ((struct sbdep *)wk) +#define WK_JTRUNC(wk) ((struct jtrunc *)(wk)) /* * Various types of lists @@ -165,6 +185,15 @@ LIST_HEAD(inodedephd, inodedep); LIST_HEAD(allocindirhd, allocindir); LIST_HEAD(allocdirecthd, allocdirect); TAILQ_HEAD(allocdirectlst, allocdirect); +LIST_HEAD(indirdephd, indirdep); +LIST_HEAD(jaddrefhd, jaddref); +LIST_HEAD(jremrefhd, jremref); +LIST_HEAD(jmvrefhd, jmvref); +LIST_HEAD(jnewblkhd, jnewblk); +LIST_HEAD(jfreeblkhd, jfreeblk); +LIST_HEAD(freeworkhd, freework); +TAILQ_HEAD(jseglst, jseg); +TAILQ_HEAD(inoreflst, inoref); /* * The "pagedep" structure tracks the various dependencies related to @@ -192,9 +221,11 @@ struct pagedep { LIST_ENTRY(pagedep) pd_hash; /* hashed lookup */ ino_t pd_ino; /* associated file */ ufs_lbn_t pd_lbn; /* block within file */ + struct newdirblk *pd_newdirblk; /* associated newdirblk if NEWBLOCK */ struct dirremhd pd_dirremhd; /* dirrem's waiting for page */ struct diraddhd pd_diraddhd[DAHASHSZ]; /* diradd dir entry updates */ struct diraddhd pd_pendinghd; /* directory entries awaiting write */ + struct jmvrefhd pd_jmvrefhd; /* Dependent journal writes. */ }; /* @@ -248,13 +279,18 @@ struct inodedep { struct worklist id_list; /* buffer holding inode block */ # define id_state id_list.wk_state /* inode dependency state */ LIST_ENTRY(inodedep) id_hash; /* hashed lookup */ + TAILQ_ENTRY(inodedep) id_unlinked; /* Unlinked but ref'd inodes */ struct fs *id_fs; /* associated filesystem */ ino_t id_ino; /* dependent inode */ nlink_t id_nlinkdelta; /* saved effective link count */ + nlink_t id_savednlink; /* Link saved during rollback */ LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */ - struct buf *id_buf; /* related bmsafemap (if pending) */ + struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */ + struct diradd *id_mkdiradd; /* diradd for a mkdir. */ + struct inoreflst id_inoreflst; /* Inode reference adjustments. */ long id_savedextsize; /* ext size saved during rollback */ off_t id_savedsize; /* file size saved during rollback */ + struct dirremhd id_dirremhd; /* Removals pending. */ struct workhead id_pendinghd; /* entries awaiting directory write */ struct workhead id_bufwait; /* operations after inode written */ struct workhead id_inowait; /* operations waiting inode update */ @@ -270,23 +306,6 @@ struct inodedep { #define id_savedino1 id_un.idu_savedino1 #define id_savedino2 id_un.idu_savedino2 -/* - * A "newblk" structure is attached to a bmsafemap structure when a block - * or fragment is allocated from a cylinder group. Its state is set to - * DEPCOMPLETE when its cylinder group map is written. It is consumed by - * an associated allocdirect or allocindir allocation which will attach - * themselves to the bmsafemap structure if the newblk's DEPCOMPLETE flag - * is not set (i.e., its cylinder group map has not been written). - */ -struct newblk { - LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ - struct fs *nb_fs; /* associated filesystem */ - int nb_state; /* state of bitmap dependency */ - ufs2_daddr_t nb_newblkno; /* allocated block number */ - LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblk's */ - struct bmsafemap *nb_bmsafemap; /* associated bmsafemap */ -}; - /* * A "bmsafemap" structure maintains a list of dependency structures * that depend on the update of a particular cylinder group map. @@ -299,11 +318,41 @@ struct newblk { */ struct bmsafemap { struct worklist sm_list; /* cylgrp buffer */ +# define sm_state sm_list.wk_state + int sm_cg; + LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ struct buf *sm_buf; /* associated buffer */ struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ + struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */ struct allocindirhd sm_allocindirhd; /* allocindir deps */ + struct allocindirhd sm_allocindirwr; /* writing allocindir deps */ struct inodedephd sm_inodedephd; /* inodedep deps */ + struct inodedephd sm_inodedepwr; /* writing inodedep deps */ struct newblkhd sm_newblkhd; /* newblk deps */ + struct newblkhd sm_newblkwr; /* writing newblk deps */ + struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */ + struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */ +}; + +/* + * A "newblk" structure is attached to a bmsafemap structure when a block + * or fragment is allocated from a cylinder group. Its state is set to + * DEPCOMPLETE when its cylinder group map is written. It is converted to + * an allocdirect or allocindir allocation once the allocator calls the + * appropriate setup function. + */ +struct newblk { + struct worklist nb_list; +# define nb_state nb_list.wk_state + LIST_ENTRY(newblk) nb_hash; /* hashed lookup */ + LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */ + struct jnewblk *nb_jnewblk; /* New block journal entry. */ + struct bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */ + struct freefrag *nb_freefrag; /* fragment to be freed (if any) */ + struct indirdephd nb_indirdeps; /* Children indirect blocks. */ + struct workhead nb_newdirblk; /* dir block to notify when written */ + struct workhead nb_jwork; /* Journal work pending. */ + ufs2_daddr_t nb_newblkno; /* new value of block pointer */ }; /* @@ -334,20 +383,18 @@ struct bmsafemap { * and inodedep->id_pendinghd lists. */ struct allocdirect { - struct worklist ad_list; /* buffer holding block */ -# define ad_state ad_list.wk_state /* block pointer state */ + struct newblk ad_block; /* Common block logic */ +# define ad_state ad_block.nb_list.wk_state /* block pointer state */ TAILQ_ENTRY(allocdirect) ad_next; /* inodedep's list of allocdirect's */ - ufs_lbn_t ad_lbn; /* block within file */ - ufs2_daddr_t ad_newblkno; /* new value of block pointer */ - ufs2_daddr_t ad_oldblkno; /* old value of block pointer */ - long ad_newsize; /* size of new block */ - long ad_oldsize; /* size of old block */ - LIST_ENTRY(allocdirect) ad_deps; /* bmsafemap's list of allocdirect's */ - struct buf *ad_buf; /* cylgrp buffer (if pending) */ struct inodedep *ad_inodedep; /* associated inodedep */ - struct freefrag *ad_freefrag; /* fragment to be freed (if any) */ - struct workhead ad_newdirblk; /* dir block to notify when written */ + ufs2_daddr_t ad_oldblkno; /* old value of block pointer */ + int ad_offset; /* Pointer offset in parent. */ + long ad_newsize; /* size of new block */ + long ad_oldsize; /* size of old block */ }; +#define ad_newblkno ad_block.nb_newblkno +#define ad_freefrag ad_block.nb_freefrag +#define ad_newdirblk ad_block.nb_newdirblk /* * A single "indirdep" structure manages all allocation dependencies for @@ -369,10 +416,14 @@ struct allocdirect { struct indirdep { struct worklist ir_list; /* buffer holding indirect block */ # define ir_state ir_list.wk_state /* indirect block pointer state */ - caddr_t ir_saveddata; /* buffer cache contents */ + LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */ + caddr_t ir_saveddata; /* buffer cache contents */ struct buf *ir_savebp; /* buffer holding safe copy */ + struct allocindirhd ir_completehd; /* waiting for indirdep complete */ + struct allocindirhd ir_writehd; /* Waiting for the pointer write. */ struct allocindirhd ir_donehd; /* done waiting to update safecopy */ struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ + struct workhead ir_jwork; /* Journal work pending. */ }; /* @@ -389,16 +440,25 @@ struct indirdep { * can then be freed as it is no longer applicable. */ struct allocindir { - struct worklist ai_list; /* buffer holding indirect block */ -# define ai_state ai_list.wk_state /* indirect block pointer state */ + struct newblk ai_block; /* Common block area */ +# define ai_state ai_block.nb_list.wk_state /* indirect pointer state */ LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ - int ai_offset; /* pointer offset in indirect block */ - ufs2_daddr_t ai_newblkno; /* new block pointer value */ - ufs2_daddr_t ai_oldblkno; /* old block pointer value */ - struct freefrag *ai_freefrag; /* block to be freed when complete */ struct indirdep *ai_indirdep; /* address of associated indirdep */ - LIST_ENTRY(allocindir) ai_deps; /* bmsafemap's list of allocindir's */ - struct buf *ai_buf; /* cylgrp buffer (if pending) */ + ufs2_daddr_t ai_oldblkno; /* old value of block pointer */ + int ai_offset; /* Pointer offset in parent. */ +}; +#define ai_newblkno ai_block.nb_newblkno +#define ai_freefrag ai_block.nb_freefrag +#define ai_newdirblk ai_block.nb_newdirblk + +/* + * The allblk union is used to size the newblk structure on allocation so + * that it may be any one of three types. + */ +union allblk { + struct allocindir ab_allocindir; + struct allocdirect ab_allocdirect; + struct newblk ab_newblk; }; /* @@ -406,14 +466,13 @@ struct allocindir { * allocated fragment is replaced with a larger fragment, rather than extended. * The "freefrag" structure is constructed and attached when the replacement * block is first allocated. It is processed after the inode claiming the - * bigger block that replaces it has been written to disk. Note that the - * ff_state field is is used to store the uid, so may lose data. However, - * the uid is used only in printing an error message, so is not critical. - * Keeping it in a short keeps the data structure down to 32 bytes. + * bigger block that replaces it has been written to disk. */ struct freefrag { struct worklist ff_list; /* id_inowait or delayed worklist */ -# define ff_state ff_list.wk_state /* owning user; should be uid_t */ +# define ff_state ff_list.wk_state + struct jfreefrag *ff_jfreefrag; /* Associated journal entry. */ + struct workhead ff_jwork; /* Journal work pending. */ ufs2_daddr_t ff_blkno; /* fragment physical block number */ long ff_fragsize; /* size of fragment being deleted */ ino_t ff_inum; /* owning inode number */ @@ -423,20 +482,57 @@ struct freefrag { * A "freeblks" structure is attached to an "inodedep" when the * corresponding file's length is reduced to zero. It records all * the information needed to free the blocks of a file after its - * zero'ed inode has been written to disk. + * zero'ed inode has been written to disk. The actual work is done + * by child freework structures which are responsible for individual + * inode pointers while freeblks is responsible for retiring the + * entire operation when it is complete and holding common members. */ struct freeblks { struct worklist fb_list; /* id_inowait or delayed worklist */ # define fb_state fb_list.wk_state /* inode and dirty block state */ + struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */ + struct workhead fb_freeworkhd; /* Work items pending */ + struct workhead fb_jwork; /* Journal work pending */ ino_t fb_previousinum; /* inode of previous owner of blocks */ uid_t fb_uid; /* uid of previous owner of blocks */ struct vnode *fb_devvp; /* filesystem device vnode */ - long fb_oldextsize; /* previous ext data size */ - off_t fb_oldsize; /* previous file size */ ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ - ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */ - ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */ - ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */ + int fb_ref; /* Children outstanding. */ +}; + +/* + * A "freework" structure handles the release of a tree of blocks or a single + * block. Each indirect block in a tree is allocated its own freework + * structure so that the indrect block may be freed only when all of its + * children are freed. In this way we enforce the rule that an allocated + * block must have a valid path to a root that is journaled. Each child + * block acquires a reference and when the ref hits zero the parent ref + * is decremented. If there is no parent the freeblks ref is decremented. + */ +struct freework { + struct worklist fw_list; +# define fw_state fw_list.wk_state + LIST_ENTRY(freework) fw_next; /* Queue for freeblksk. */ + struct freeblks *fw_freeblks; /* Root of operation. */ + struct freework *fw_parent; /* Parent indirect. */ + ufs2_daddr_t fw_blkno; /* Our block #. */ + ufs_lbn_t fw_lbn; /* Original lbn before free. */ + int fw_frags; /* Number of frags. */ + int fw_ref; /* Number of children out. */ + int fw_off; /* Current working position. */ + struct workhead fw_jwork; /* Journal work pending. */ +}; + +/* + * A "freedep" structure is allocated to track the completion of a bitmap + * write for a freework. One freedep may cover many freed blocks so long + * as they reside in the same cylinder group. When the cg is written + * the freedep decrements the ref on the freework which may permit it + * to be freed as well. + */ +struct freedep { + struct worklist fd_list; + struct freework *fd_freework; /* Parent freework. */ }; /* @@ -450,6 +546,7 @@ struct freefile { mode_t fx_mode; /* mode of inode */ ino_t fx_oldinum; /* inum of the unlinked file */ struct vnode *fx_devvp; /* filesystem device vnode */ + struct workhead fx_jwork; /* journal work pending. */ }; /* @@ -482,12 +579,11 @@ struct freefile { * than zero. * * The overlaying of da_pagedep and da_previous is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. If a - * da_previous entry is present, the pointer to its pagedep is available - * in the associated dirrem entry. If the DIRCHG flag is set, the - * da_previous entry is valid; if not set the da_pagedep entry is valid. - * The DIRCHG flag never changes; it is set when the structure is created - * if appropriate and is never cleared. + * structure down. If a da_previous entry is present, the pointer to its + * pagedep is available in the associated dirrem entry. If the DIRCHG flag + * is set, the da_previous entry is valid; if not set the da_pagedep entry + * is valid. The DIRCHG flag never changes; it is set when the structure + * is created if appropriate and is never cleared. */ struct diradd { struct worklist da_list; /* id_inowait or id_pendinghd list */ @@ -499,6 +595,7 @@ struct diradd { struct dirrem *dau_previous; /* entry being replaced in dir change */ struct pagedep *dau_pagedep; /* pagedep dependency for addition */ } da_un; + struct workhead da_jwork; /* Journal work awaiting completion. */ }; #define da_previous da_un.dau_previous #define da_pagedep da_un.dau_pagedep @@ -525,12 +622,13 @@ struct diradd { * mkdir structures that reference it. The deletion would be faster if the * diradd structure were simply augmented to have two pointers that referenced * the associated mkdir's. However, this would increase the size of the diradd - * structure from 32 to 64-bits to speed a very infrequent operation. + * structure to speed a very infrequent operation. */ struct mkdir { struct worklist md_list; /* id_inowait or buffer holding dir */ # define md_state md_list.wk_state /* type: MKDIR_PARENT or MKDIR_BODY */ struct diradd *md_diradd; /* associated diradd */ + struct jaddref *md_jaddref; /* dependent jaddref. */ struct buf *md_buf; /* MKDIR_BODY: buffer holding dir */ LIST_ENTRY(mkdir) md_mkdirs; /* list of all mkdirs */ }; @@ -542,20 +640,19 @@ LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; * list of the pagedep for the directory page that contains the entry. * It is processed after the directory page with the deleted entry has * been written to disk. - * - * The overlaying of dm_pagedep and dm_dirinum is done to keep the - * structure down to 32 bytes in size on a 32-bit machine. It works - * because they are never used concurrently. */ struct dirrem { struct worklist dm_list; /* delayed worklist */ # define dm_state dm_list.wk_state /* state of the old directory entry */ LIST_ENTRY(dirrem) dm_next; /* pagedep's list of dirrem's */ + LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */ + struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */ ino_t dm_oldinum; /* inum of the removed dir entry */ union { struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ ino_t dmu_dirinum; /* parent inode number (for rmdir) */ } dm_un; + struct workhead dm_jwork; /* Journal work awaiting completion. */ }; #define dm_pagedep dm_un.dmu_pagedep #define dm_dirinum dm_un.dmu_dirinum @@ -577,9 +674,200 @@ struct dirrem { * blocks using a similar scheme with the allocindir structures. Rather * than adding this level of complexity, we simply write those newly * allocated indirect blocks synchronously as such allocations are rare. + * In the case of a new directory the . and .. links are tracked with + * a mkdir rather than a pagedep. In this case we track the mkdir + * so it can be released when it is written. A workhead is used + * to simplify canceling a mkdir that is removed by a subsequent dirrem. */ struct newdirblk { struct worklist db_list; /* id_inowait or pg_newdirblk */ # define db_state db_list.wk_state /* unused */ struct pagedep *db_pagedep; /* associated pagedep */ + struct workhead db_mkdir; +}; + +/* + * The inoref structure holds the elements common to jaddref and jremref + * so they may easily be queued in-order on the inodedep. + */ +struct inoref { + struct worklist if_list; +# define if_state if_list.wk_state + TAILQ_ENTRY(inoref) if_deps; /* Links for inodedep. */ + struct jsegdep *if_jsegdep; + off_t if_diroff; /* Directory offset. */ + ino_t if_ino; /* Inode number. */ + ino_t if_parent; /* Parent inode number. */ + nlink_t if_nlink; /* nlink before addition. */ + uint16_t if_mode; /* File mode, needed for IFMT. */ +}; + +/* + * A "jaddref" structure tracks a new reference (link count) on an inode + * and prevents the link count increase and bitmap allocation until a + * journal entry can be written. Once the journal entry is written, + * the inode is put on the pendinghd of the bmsafemap and a diradd or + * mkdir entry is placed on the bufwait list of the inode. The DEPCOMPLETE + * flag is used to indicate that all of the required information for writing + * the journal entry is present. MKDIR_BODY and MKDIR_PARENT are used to + * differentiate . and .. links from regular file names. NEWBLOCK indicates + * a bitmap is still pending. If a new reference is canceled by a delete + * prior to writing the journal the jaddref write is canceled and the + * structure persists to prevent any disk-visible changes until it is + * ultimately released when the file is freed or the link is dropped again. + */ +struct jaddref { + struct inoref ja_ref; +# define ja_list ja_ref.if_list /* Journal pending or jseg entries. */ +# define ja_state ja_ref.if_list.wk_state + LIST_ENTRY(jaddref) ja_bmdeps; /* Links for bmsafemap. */ + union { + struct diradd *jau_diradd; /* Pending diradd. */ + struct mkdir *jau_mkdir; /* MKDIR_{PARENT,BODY} */ + } ja_un; +}; +#define ja_diradd ja_un.jau_diradd +#define ja_mkdir ja_un.jau_mkdir +#define ja_diroff ja_ref.if_diroff +#define ja_ino ja_ref.if_ino +#define ja_parent ja_ref.if_parent +#define ja_mode ja_ref.if_mode + +/* + * A "jremref" structure tracks a removed reference (unlink) on an + * inode and prevents the directory remove from proceeding until the + * journal entry is written. Once the journal has been written the remove + * may proceed as normal. + */ +struct jremref { + struct inoref jr_ref; +# define jr_list jr_ref.if_list /* Journal pending or jseg entries. */ +# define jr_state jr_ref.if_list.wk_state + LIST_ENTRY(jremref) jr_deps; /* Links for pagdep. */ + struct dirrem *jr_dirrem; /* Back pointer to dirrem. */ +}; + +struct jmvref { + struct worklist jm_list; + LIST_ENTRY(jmvref) jm_deps; + struct pagedep *jm_pagedep; + ino_t jm_parent; + ino_t jm_ino; + off_t jm_oldoff; + off_t jm_newoff; +}; + +/* + * A "jnewblk" structure tracks a newly allocated block or fragment and + * prevents the direct or indirect block pointer as well as the cg bitmap + * from being written until it is logged. After it is logged the jsegdep + * is attached to the allocdirect or allocindir until the operation is + * completed or reverted. If the operation is reverted prior to the journal + * write the jnewblk structure is maintained to prevent the bitmaps from + * reaching the disk. Ultimately the jnewblk structure will be passed + * to the free routine as the in memory cg is modified back to the free + * state at which time it can be released. + */ +struct jnewblk { + struct worklist jn_list; +# define jn_state jn_list.wk_state + struct jsegdep *jn_jsegdep; + LIST_ENTRY(jnewblk) jn_deps; /* All jnewblks on bmsafemap */ + struct newblk *jn_newblk; + ino_t jn_ino; + ufs_lbn_t jn_lbn; + ufs2_daddr_t jn_blkno; + int jn_oldfrags; + int jn_frags; +}; + +/* + * A "jfreeblk" structure tracks the journal write for freeing a block + * or tree of blocks. The block pointer must not be cleared in the inode + * or indirect prior to the jfreeblk being written. + */ +struct jfreeblk { + struct worklist jf_list; +# define jf_state jf_list.wk_state + struct jsegdep *jf_jsegdep; + struct freeblks *jf_freeblks; + LIST_ENTRY(jfreeblk) jf_deps; + ino_t jf_ino; + ufs_lbn_t jf_lbn; + ufs2_daddr_t jf_blkno; + int jf_frags; +}; + +/* + * A "jfreefrag" tracks the freeing of a single block when a fragment is + * extended or an indirect page is replaced. It is not part of a larger + * freeblks operation. + */ +struct jfreefrag { + struct worklist fr_list; +# define fr_state fr_list.wk_state + struct jsegdep *fr_jsegdep; + struct freefrag *fr_freefrag; + ino_t fr_ino; + ufs_lbn_t fr_lbn; + ufs2_daddr_t fr_blkno; + int fr_frags; +}; + +/* + * A "jtrunc" journals the intent to truncate an inode to a non-zero + * value. This is done synchronously prior to the synchronous partial + * truncation process. The jsegdep is not released until the truncation + * is complete and the truncated inode is fsync'd. + */ +struct jtrunc { + struct worklist jt_list; + struct jsegdep *jt_jsegdep; + ino_t jt_ino; + off_t jt_size; + int jt_extsize; +}; + +/* + * A "jsegdep" structure tracks a single reference to a written journal + * segment so the journal space can be reclaimed when all dependencies + * have been written. + */ +struct jsegdep { + struct worklist jd_list; +# define jd_state jd_list.wk_state + struct jseg *jd_seg; +}; + +/* + * A "jseg" structure contains all of the journal records written in a + * single disk write. jaddref and jremref structures are linked into + * js_entries so thay may be completed when the write completes. The + * js_deps array contains as many entries as there are ref counts to + * reduce the number of allocations required per journal write to one. + */ +struct jseg { + struct worklist js_list; /* b_deps link for journal */ +# define js_state js_list.wk_state + struct workhead js_entries; /* Entries awaiting write */ + TAILQ_ENTRY(jseg) js_next; + struct jblocks *js_jblocks; /* Back pointer to block/seg list */ + struct buf *js_buf; /* Buffer while unwritten */ + uint64_t js_seq; + int js_size; /* Allocated size in bytes */ + int js_cnt; /* Total items allocated */ + int js_refs; /* Count of items pending completion */ +}; + +/* + * A 'sbdep' structure tracks the head of the free inode list and + * superblock writes. This makes sure the superblock is always pointing at + * the first possible unlinked inode for the suj recovery process. If a + * block write completes and we discover a new head is available the buf + * is dirtied and the dep is kept. + */ +struct sbdep { + struct worklist sb_list; /* b_dep linkage */ + struct fs *sb_fs; /* Filesystem pointer within buf. */ + struct ufsmount *sb_ump; }; diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h index 7f9e7c56496..c75257c8e62 100644 --- a/sys/ufs/ufs/dinode.h +++ b/sys/ufs/ufs/dinode.h @@ -146,7 +146,8 @@ struct ufs2_dinode { ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */ ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */ u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */ - int64_t di_spare[2]; /* 240: Reserved; currently unused */ + ino_t di_freelink; /* 240: SUJ: Next unlinked inode. */ + uint32_t di_spare[3]; /* 244: Reserved; currently unused */ }; /* @@ -167,9 +168,7 @@ struct ufs2_dinode { struct ufs1_dinode { u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ int16_t di_nlink; /* 2: File link count. */ - union { - u_int16_t oldids[2]; /* 4: Ffs: old user and group ids. */ - } di_u; + ino_t di_freelink; /* 4: SUJ: Next unlinked inode. */ u_int64_t di_size; /* 8: File byte count. */ int32_t di_atime; /* 16: Last access time. */ int32_t di_atimensec; /* 20: Last access time. */ @@ -186,7 +185,5 @@ struct ufs1_dinode { u_int32_t di_gid; /* 116: File group. */ u_int64_t di_modrev; /* 120: i_modrev for NFSv4 */ }; -#define di_ogid di_u.oldids[1] -#define di_ouid di_u.oldids[0] #endif /* _UFS_UFS_DINODE_H_ */ diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 565580e6046..295b12975e2 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -120,7 +120,7 @@ struct inode { #define IN_CHANGE 0x0002 /* Inode change time update request. */ #define IN_UPDATE 0x0004 /* Modification time update request. */ #define IN_MODIFIED 0x0008 /* Inode has been modified. */ -#define IN_RENAME 0x0010 /* Inode is being renamed. */ +#define IN_NEEDSYNC 0x0010 /* Inode requires fsync. */ #define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */ #define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ #define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the @@ -175,6 +175,7 @@ struct indir { /* Determine if soft dependencies are being done */ #define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) #define DOINGASYNC(vp) ((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) +#define DOINGSUJ(vp) ((vp)->v_mount->mnt_kern_flag & MNTK_SUJ) /* This overlays the fid structure (see mount.h). */ struct ufid { diff --git a/sys/ufs/ufs/quota.h b/sys/ufs/ufs/quota.h index 28f4e92d8fc..ca0dcced7d2 100644 --- a/sys/ufs/ufs/quota.h +++ b/sys/ufs/ufs/quota.h @@ -81,10 +81,14 @@ #define Q_QUOTAON 0x0100 /* enable quotas */ #define Q_QUOTAOFF 0x0200 /* disable quotas */ -#define Q_GETQUOTA 0x0300 /* get limits and usage */ -#define Q_SETQUOTA 0x0400 /* set limits and usage */ -#define Q_SETUSE 0x0500 /* set usage */ +#define Q_GETQUOTA32 0x0300 /* get limits and usage (32-bit version) */ +#define Q_SETQUOTA32 0x0400 /* set limits and usage (32-bit version) */ +#define Q_SETUSE32 0x0500 /* set usage (32-bit version) */ #define Q_SYNC 0x0600 /* sync disk copy of a filesystems quotas */ +#define Q_GETQUOTA 0x0700 /* get limits and usage (64-bit version) */ +#define Q_SETQUOTA 0x0800 /* set limits and usage (64-bit version) */ +#define Q_SETUSE 0x0900 /* set usage (64-bit version) */ +#define Q_GETQUOTASIZE 0x0A00 /* get bit-size of quota file fields */ /* * The following structure defines the format of the disk quota file @@ -93,7 +97,7 @@ * the vnode for each quota file (a pointer is retained in the ufsmount * structure). */ -struct dqblk { +struct dqblk32 { u_int32_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ u_int32_t dqb_bsoftlimit; /* preferred limit on disk blks */ u_int32_t dqb_curblocks; /* current block count */ @@ -104,6 +108,30 @@ struct dqblk { int32_t dqb_itime; /* time limit for excessive files */ }; +struct dqblk64 { + u_int64_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ + u_int64_t dqb_bsoftlimit; /* preferred limit on disk blks */ + u_int64_t dqb_curblocks; /* current block count */ + u_int64_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ + u_int64_t dqb_isoftlimit; /* preferred inode limit */ + u_int64_t dqb_curinodes; /* current # allocated inodes */ + int64_t dqb_btime; /* time limit for excessive disk use */ + int64_t dqb_itime; /* time limit for excessive files */ +}; + +#define dqblk dqblk64 + +#define Q_DQHDR64_MAGIC "QUOTA64" +#define Q_DQHDR64_VERSION 0x20081104 + +struct dqhdr64 { + char dqh_magic[8]; /* Q_DQHDR64_MAGIC */ + uint32_t dqh_version; /* Q_DQHDR64_VERSION */ + uint32_t dqh_hdrlen; /* header length */ + uint32_t dqh_reclen; /* record length */ + char dqh_unused[44]; /* reserved for future extension */ +}; + #ifdef _KERNEL #include @@ -125,7 +153,7 @@ struct dquot { u_int32_t dq_id; /* identifier this applies to */ struct ufsmount *dq_ump; /* (h) filesystem that this is taken from */ - struct dqblk dq_dqb; /* actual usage & quotas */ + struct dqblk64 dq_dqb; /* actual usage & quotas */ }; /* * Flag values. @@ -199,12 +227,16 @@ void dqinit(void); void dqrele(struct vnode *, struct dquot *); void dquninit(void); int getinoquota(struct inode *); -int getquota(struct thread *, struct mount *, u_long, int, void *); int qsync(struct mount *mp); int quotaoff(struct thread *td, struct mount *, int); int quotaon(struct thread *td, struct mount *, int, void *); +int getquota32(struct thread *, struct mount *, u_long, int, void *); +int setquota32(struct thread *, struct mount *, u_long, int, void *); +int setuse32(struct thread *, struct mount *, u_long, int, void *); +int getquota(struct thread *, struct mount *, u_long, int, void *); int setquota(struct thread *, struct mount *, u_long, int, void *); int setuse(struct thread *, struct mount *, u_long, int, void *); +int getquotasize(struct thread *, struct mount *, u_long, int, void *); vfs_quotactl_t ufs_quotactl; #else /* !_KERNEL */ diff --git a/sys/ufs/ufs/ufs_dirhash.c b/sys/ufs/ufs/ufs_dirhash.c index c85fdc8980f..d7c1d0ddb82 100644 --- a/sys/ufs/ufs/ufs_dirhash.c +++ b/sys/ufs/ufs/ufs_dirhash.c @@ -68,8 +68,6 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables"); -static SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); - static int ufs_mindirhashsize = DIRBLKSIZ * 5; SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, &ufs_mindirhashsize, diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index b2e4a975730..6658b663fb1 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -57,7 +57,7 @@ int ufs_bmap(struct vop_bmap_args *); int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, struct buf *, int *, int *); int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **); -int ufs_checkpath(ino_t, struct inode *, struct ucred *); +int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *); void ufs_dirbad(struct inode *, doff_t, char *); int ufs_dirbadentry(struct vnode *, struct direct *, int); int ufs_dirempty(struct inode *, ino_t, struct ucred *); @@ -66,9 +66,11 @@ int ufs_extwrite(struct vop_write_args *); void ufs_makedirentry(struct inode *, struct componentname *, struct direct *); int ufs_direnter(struct vnode *, struct vnode *, struct direct *, - struct componentname *, struct buf *); + struct componentname *, struct buf *, int); int ufs_dirremove(struct vnode *, struct inode *, int, int); int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int); +int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *, + ino_t *); int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *); int ufs_inactive(struct vop_inactive_args *); int ufs_init(struct vfsconf *); @@ -81,19 +83,33 @@ vfs_root_t ufs_root; int ufs_uninit(struct vfsconf *); int ufs_vinit(struct mount *, struct vop_vector *, struct vnode **); +#include +SYSCTL_DECL(_vfs_ufs); + /* * Soft update function prototypes. */ int softdep_setup_directory_add(struct buf *, struct inode *, off_t, ino_t, struct buf *, int); -void softdep_change_directoryentry_offset(struct inode *, caddr_t, - caddr_t, caddr_t, int); +void softdep_change_directoryentry_offset(struct buf *, struct inode *, + caddr_t, caddr_t, caddr_t, int); void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int); void softdep_setup_directory_change(struct buf *, struct inode *, struct inode *, ino_t, int); void softdep_change_linkcnt(struct inode *); void softdep_releasefile(struct inode *); int softdep_slowdown(struct vnode *); +void softdep_setup_create(struct inode *, struct inode *); +void softdep_setup_dotdot_link(struct inode *, struct inode *); +void softdep_setup_link(struct inode *, struct inode *); +void softdep_setup_mkdir(struct inode *, struct inode *); +void softdep_setup_rmdir(struct inode *, struct inode *); +void softdep_setup_unlink(struct inode *, struct inode *); +void softdep_revert_create(struct inode *, struct inode *); +void softdep_revert_dotdot_link(struct inode *, struct inode *); +void softdep_revert_link(struct inode *, struct inode *); +void softdep_revert_mkdir(struct inode *, struct inode *); +void softdep_revert_rmdir(struct inode *, struct inode *); /* * Flags to low-level allocation routines. The low 16-bits are reserved diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index b0247e77d0d..0030c5264bd 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -77,9 +77,6 @@ SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) -static int ufs_lookup_(struct vnode *, struct vnode **, struct componentname *, - ino_t *); - static int ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred, struct thread *td) @@ -189,11 +186,11 @@ ufs_lookup(ap) } */ *ap; { - return (ufs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); + return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } -static int -ufs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, +int +ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, ino_t *dd_ino) { struct inode *dp; /* inode for directory being searched */ @@ -524,6 +521,8 @@ notfound: return (ENOENT); found: + if (dd_ino != NULL) + *dd_ino = ino; if (numdirpasses == 2) nchstats.ncs_pass2++; /* @@ -546,11 +545,6 @@ found: if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1); - if (dd_ino != NULL) { - *dd_ino = ino; - return (0); - } - /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. @@ -558,17 +552,6 @@ found: if (nameiop == DELETE && (flags & ISLASTCN)) { if (flags & LOCKPARENT) ASSERT_VOP_ELOCKED(vdp, __FUNCTION__); - if ((error = VFS_VGET(vdp->v_mount, ino, - LK_EXCLUSIVE, &tdp)) != 0) - return (error); - - error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); - if (error) { - vput(tdp); - return (error); - } - - /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there @@ -585,6 +568,16 @@ found: dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; + if (dd_ino != NULL) + return (0); + if ((error = VFS_VGET(vdp->v_mount, ino, + LK_EXCLUSIVE, &tdp)) != 0) + return (error); + error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); + if (error) { + vput(tdp); + return (error); + } if (dp->i_number == ino) { VREF(vdp); *vpp = vdp; @@ -616,6 +609,8 @@ found: dp->i_offset = i_offset; if (dp->i_number == ino) return (EISDIR); + if (dd_ino != NULL) + return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); @@ -650,6 +645,8 @@ found: cnp->cn_flags |= SAVENAME; return (0); } + if (dd_ino != NULL) + return (0); /* * Step through the translation in the name. We do not `vput' the @@ -681,7 +678,7 @@ found: * to the inode we looked up before vdp lock was * dropped. */ - error = ufs_lookup_(pdp, NULL, cnp, &ino1); + error = ufs_lookup_ino(pdp, NULL, cnp, &ino1); if (error) { vput(tdp); return (error); @@ -704,6 +701,14 @@ found: vn_lock(vdp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(vdp, LK_DOWNGRADE | LK_RETRY); + /* + * Relock for the "." case may left us with + * reclaimed vnode. + */ + if (vdp->v_iflag & VI_DOOMED) { + vrele(vdp); + return (ENOENT); + } } *vpp = vdp; } else { @@ -825,12 +830,13 @@ ufs_makedirentry(ip, cnp, newdirp) * soft dependency code). */ int -ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) +ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) struct vnode *dvp; struct vnode *tvp; struct direct *dirp; struct componentname *cnp; struct buf *newdirbp; + int isrename; { struct ucred *cr; struct thread *td; @@ -903,22 +909,28 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, - dirp->d_ino, newdirbp, 1) == 0) { - bdwrite(bp); + dirp->d_ino, newdirbp, 1)) + dp->i_flag |= IN_NEEDSYNC; + if (newdirbp) + bdwrite(newdirbp); + bdwrite(bp); + if ((dp->i_flag & IN_NEEDSYNC) == 0) return (UFS_UPDATE(dvp, 0)); - } - /* We have just allocated a directory block in an - * indirect block. Rather than tracking when it gets - * claimed by the inode, we simply do a VOP_FSYNC - * now to ensure that it is there (in case the user - * does a future fsync). Note that we have to unlock - * the inode for the entry that we just entered, as - * the VOP_FSYNC may need to lock other inodes which - * can lead to deadlock if we also hold a lock on - * the newly entered node. + /* + * We have just allocated a directory block in an + * indirect block. We must prevent holes in the + * directory created if directory entries are + * written out of order. To accomplish this we + * fsync when we extend a directory into indirects. + * During rename it's not safe to drop the tvp lock + * so sync must be delayed until it is. + * + * This synchronous step could be removed if fsck and + * the kernel were taught to fill in sparse + * directories rather than panic. */ - if ((error = bwrite(bp))) - return (error); + if (isrename) + return (0); if (tvp != NULL) VOP_UNLOCK(tvp, 0); error = VOP_FSYNC(dvp, MNT_WAIT, td); @@ -1007,7 +1019,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) - softdep_change_directoryentry_offset(dp, dirbuf, + softdep_change_directoryentry_offset(bp, dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else bcopy((caddr_t)nep, (caddr_t)ep, dsize); @@ -1059,6 +1071,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) (void) softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); + if (newdirbp != NULL) + bdwrite(newdirbp); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { @@ -1076,7 +1090,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ - if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { + if (isrename == 0 && error == 0 && + dp->i_endoff && dp->i_endoff < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp, 0); #ifdef UFS_DIRHASH @@ -1117,6 +1132,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir) dp = VTOI(dvp); + /* + * Adjust the link count early so softdep can block if necessary. + */ + if (ip) { + ip->i_effnlink--; + if (DOINGSOFTDEP(dvp)) { + softdep_setup_unlink(dp, ip); + } else { + ip->i_nlink--; + DIP_SET(ip, i_nlink, ip->i_nlink); + ip->i_flag |= IN_CHANGE; + } + } if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. @@ -1146,6 +1174,9 @@ ufs_dirremove(dvp, ip, flags, isrmdir) if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, rep, dp->i_offset); #endif + if (ip && rep->d_ino != ip->i_number) + panic("ufs_dirremove: ip %d does not match dirent ino %d\n", + ip->i_number, rep->d_ino); if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. @@ -1164,31 +1195,20 @@ ufs_dirremove(dvp, ip, flags, isrmdir) dp->i_offset & ~(DIRBLKSIZ - 1)); #endif out: + error = 0; if (DOINGSOFTDEP(dvp)) { - if (ip) { - ip->i_effnlink--; - softdep_change_linkcnt(ip); + if (ip) softdep_setup_remove(bp, dp, ip, isrmdir); - } - if (softdep_slowdown(dvp)) { + if (softdep_slowdown(dvp)) error = bwrite(bp); - } else { + else bdwrite(bp); - error = 0; - } } else { - if (ip) { - ip->i_effnlink--; - ip->i_nlink--; - DIP_SET(ip, i_nlink, ip->i_nlink); - ip->i_flag |= IN_CHANGE; - } if (flags & DOWHITEOUT) error = bwrite(bp); - else if (DOINGASYNC(dvp) && dp->i_count != 0) { + else if (DOINGASYNC(dvp) && dp->i_count != 0) bdwrite(bp); - error = 0; - } else + else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; @@ -1221,6 +1241,19 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) struct vnode *vdp = ITOV(dp); int error; + /* + * Drop the link before we lock the buf so softdep can block if + * necessary. + */ + oip->i_effnlink--; + if (DOINGSOFTDEP(vdp)) { + softdep_setup_unlink(dp, oip); + } else { + oip->i_nlink--; + DIP_SET(oip, i_nlink, oip->i_nlink); + oip->i_flag |= IN_CHANGE; + } + error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); @@ -1232,15 +1265,10 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) ep->d_ino = newinum; if (!OFSFMT(vdp)) ep->d_type = newtype; - oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { - softdep_change_linkcnt(oip); softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { - oip->i_nlink--; - DIP_SET(oip, i_nlink, oip->i_nlink); - oip->i_flag |= IN_CHANGE; if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; @@ -1355,25 +1383,25 @@ ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino) /* * Check if source directory is in the path of the target directory. - * Target is supplied locked, source is unlocked. - * The target is always vput before returning. */ int -ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred) +ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino) { - struct vnode *vp, *vp1; + struct mount *mp; + struct vnode *tvp, *vp, *vp1; int error; ino_t dd_ino; - vp = ITOV(target); - if (target->i_number == source_ino) { - error = EEXIST; - goto out; - } - error = 0; + vp = tvp = ITOV(target); + mp = vp->v_mount; + *wait_ino = 0; + if (target->i_number == source_ino) + return (EEXIST); + if (target->i_number == parent_ino) + return (0); if (target->i_number == ROOTINO) - goto out; - + return (0); + error = 0; for (;;) { error = ufs_dir_dd_ino(vp, cred, &dd_ino); if (error != 0) @@ -1384,9 +1412,13 @@ ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred) } if (dd_ino == ROOTINO) break; - error = vn_vget_ino(vp, dd_ino, LK_EXCLUSIVE, &vp1); - if (error != 0) + if (dd_ino == parent_ino) break; + error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1); + if (error != 0) { + *wait_ino = dd_ino; + break; + } /* Recheck that ".." still points to vp1 after relock of vp */ error = ufs_dir_dd_ino(vp, cred, &dd_ino); if (error != 0) { @@ -1398,14 +1430,14 @@ ufs_checkpath(ino_t source_ino, struct inode *target, struct ucred *cred) vput(vp1); continue; } - vput(vp); + if (vp != tvp) + vput(vp); vp = vp1; } -out: if (error == ENOTDIR) - printf("checkpath: .. not a directory\n"); - if (vp != NULL) + panic("checkpath: .. not a directory\n"); + if (vp != tvp) vput(vp); return (error); } diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 2db0444238d..c5161899aa8 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -59,6 +60,8 @@ __FBSDID("$FreeBSD$"); #include #include +CTASSERT(sizeof(struct dqblk64) == sizeof(struct dqhdr64)); + static int unprivileged_get_quota = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_get_quota, CTLFLAG_RW, &unprivileged_get_quota, 0, @@ -73,6 +76,7 @@ static char *quotatypes[] = INITQFNAMES; static int chkdqchg(struct inode *, ufs2_daddr_t, struct ucred *, int, int *); static int chkiqchg(struct inode *, int, struct ucred *, int, int *); +static int dqopen(struct vnode *, struct ufsmount *, int); static int dqget(struct vnode *, u_long, struct ufsmount *, int, struct dquot **); static int dqsync(struct vnode *, struct dquot *); @@ -80,6 +84,14 @@ static void dqflush(struct vnode *); static int quotaoff1(struct thread *td, struct mount *mp, int type); static int quotaoff_inchange(struct thread *td, struct mount *mp, int type); +/* conversion functions - from_to() */ +static void dqb32_dq(const struct dqblk32 *, struct dquot *); +static void dqb64_dq(const struct dqblk64 *, struct dquot *); +static void dq_dqb32(const struct dquot *, struct dqblk32 *); +static void dq_dqb64(const struct dquot *, struct dqblk64 *); +static void dqb32_dqb64(const struct dqblk32 *, struct dqblk64 *); +static void dqb64_dqb32(const struct dqblk64 *, struct dqblk32 *); + #ifdef DIAGNOSTIC static void dqref(struct dquot *); static void chkdquot(struct inode *); @@ -90,7 +102,7 @@ static void chkdquot(struct inode *); * * This routine completely defines the semantics of quotas. * If other criterion want to be used to establish quotas, the - * MAXQUOTAS value in quotas.h should be increased, and the + * MAXQUOTAS value in quota.h should be increased, and the * additional dquots set up here. */ int @@ -496,6 +508,9 @@ quotaon(struct thread *td, struct mount *mp, int type, void *fname) if (error) return (error); + if (mp->mnt_flag & MNT_RDONLY) + return (EROFS); + ump = VFSTOUFS(mp); dq = NODQUOT; @@ -522,10 +537,18 @@ quotaon(struct thread *td, struct mount *mp, int type, void *fname) return (EALREADY); } ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING; + UFS_UNLOCK(ump); + if ((error = dqopen(vp, ump, type)) != 0) { + UFS_LOCK(ump); + ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING); + UFS_UNLOCK(ump); + (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); + VFS_UNLOCK_GIANT(vfslocked); + return (error); + } MNT_ILOCK(mp); mp->mnt_flag |= MNT_QUOTA; MNT_IUNLOCK(mp); - UFS_UNLOCK(ump); vpp = &ump->um_quotas[type]; if (*vpp != vp) @@ -734,8 +757,9 @@ quotaoff(struct thread *td, struct mount *mp, int type) /* * Q_GETQUOTA - return current values in a dqblk structure. */ -int -getquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +static int +_getquota(struct thread *td, struct mount *mp, u_long id, int type, + struct dqblk64 *dqb) { struct dquot *dq; int error; @@ -766,7 +790,7 @@ getquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq); if (error) return (error); - error = copyout(&dq->dq_dqb, addr, sizeof (struct dqblk)); + *dqb = dq->dq_dqb; dqrele(NULLVP, dq); return (error); } @@ -774,23 +798,21 @@ getquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) /* * Q_SETQUOTA - assign an entire dqblk structure. */ -int -setquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +static int +_setquota(struct thread *td, struct mount *mp, u_long id, int type, + struct dqblk64 *dqb) { struct dquot *dq; struct dquot *ndq; struct ufsmount *ump; - struct dqblk newlim; + struct dqblk64 newlim; int error; error = priv_check(td, PRIV_VFS_SETQUOTA); if (error) return (error); - ump = VFSTOUFS(mp); - error = copyin(addr, &newlim, sizeof (struct dqblk)); - if (error) - return (error); + newlim = *dqb; ndq = NODQUOT; ump = VFSTOUFS(mp); @@ -839,23 +861,21 @@ setquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) /* * Q_SETUSE - set current inode and block usage. */ -int -setuse(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +static int +_setuse(struct thread *td, struct mount *mp, u_long id, int type, + struct dqblk64 *dqb) { struct dquot *dq; struct ufsmount *ump; struct dquot *ndq; - struct dqblk usage; + struct dqblk64 usage; int error; error = priv_check(td, PRIV_UFS_SETUSE); if (error) return (error); - ump = VFSTOUFS(mp); - error = copyin(addr, &usage, sizeof (struct dqblk)); - if (error) - return (error); + usage = *dqb; ump = VFSTOUFS(mp); ndq = NODQUOT; @@ -888,6 +908,114 @@ setuse(struct thread *td, struct mount *mp, u_long id, int type, void *addr) return (0); } +int +getquota32(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk32 dqb32; + struct dqblk64 dqb64; + int error; + + error = _getquota(td, mp, id, type, &dqb64); + if (error) + return (error); + dqb64_dqb32(&dqb64, &dqb32); + error = copyout(&dqb32, addr, sizeof(dqb32)); + return (error); +} + +int +setquota32(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk32 dqb32; + struct dqblk64 dqb64; + int error; + + error = copyin(addr, &dqb32, sizeof(dqb32)); + if (error) + return (error); + dqb32_dqb64(&dqb32, &dqb64); + error = _setquota(td, mp, id, type, &dqb64); + return (error); +} + +int +setuse32(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk32 dqb32; + struct dqblk64 dqb64; + int error; + + error = copyin(addr, &dqb32, sizeof(dqb32)); + if (error) + return (error); + dqb32_dqb64(&dqb32, &dqb64); + error = _setuse(td, mp, id, type, &dqb64); + return (error); +} + +int +getquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk64 dqb64; + int error; + + error = _getquota(td, mp, id, type, &dqb64); + if (error) + return (error); + error = copyout(&dqb64, addr, sizeof(dqb64)); + return (error); +} + +int +setquota(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk64 dqb64; + int error; + + error = copyin(addr, &dqb64, sizeof(dqb64)); + if (error) + return (error); + error = _setquota(td, mp, id, type, &dqb64); + return (error); +} + +int +setuse(struct thread *td, struct mount *mp, u_long id, int type, void *addr) +{ + struct dqblk64 dqb64; + int error; + + error = copyin(addr, &dqb64, sizeof(dqb64)); + if (error) + return (error); + error = _setuse(td, mp, id, type, &dqb64); + return (error); +} + +/* + * Q_GETQUOTASIZE - get bit-size of quota file fields + */ +int +getquotasize(struct thread *td, struct mount *mp, u_long id, int type, + void *sizep) +{ + struct ufsmount *ump = VFSTOUFS(mp); + int bitsize; + + UFS_LOCK(ump); + if (ump->um_quotas[type] == NULLVP || + (ump->um_qflags[type] & QTF_CLOSING)) { + UFS_UNLOCK(ump); + return (EINVAL); + } + if ((ump->um_qflags[type] & QTF_64BIT) != 0) + bitsize = 64; + else + bitsize = 32; + UFS_UNLOCK(ump); + return (copyout(&bitsize, sizep, sizeof(int))); +} + /* * Q_SYNC - sync quota files to disk. */ @@ -1024,6 +1152,60 @@ dqhashfind(struct dqhash *dqh, u_long id, struct vnode *dqvp) return (NODQUOT); } +/* + * Determine the quota file type. + * + * A 32-bit quota file is simply an array of struct dqblk32. + * + * A 64-bit quota file is a struct dqhdr64 followed by an array of struct + * dqblk64. The header contains various magic bits which allow us to be + * reasonably confident that it is indeeda 64-bit quota file and not just + * a 32-bit quota file that just happens to "look right". + * + */ +static int +dqopen(struct vnode *vp, struct ufsmount *ump, int type) +{ + struct dqhdr64 dqh; + struct iovec aiov; + struct uio auio; + int error, vfslocked; + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + aiov.iov_base = &dqh; + aiov.iov_len = sizeof(dqh); + auio.uio_resid = sizeof(dqh); + auio.uio_offset = 0; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_td = (struct thread *)0; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = VOP_READ(vp, &auio, 0, ump->um_cred[type]); + VFS_UNLOCK_GIANT(vfslocked); + + if (error != 0) + return (error); + if (auio.uio_resid > 0) { + /* assume 32 bits */ + return (0); + } + + UFS_LOCK(ump); + if (strcmp(dqh.dqh_magic, Q_DQHDR64_MAGIC) == 0 && + be32toh(dqh.dqh_version) == Q_DQHDR64_VERSION && + be32toh(dqh.dqh_hdrlen) == (uint32_t)sizeof(struct dqhdr64) && + be32toh(dqh.dqh_reclen) == (uint32_t)sizeof(struct dqblk64)) { + /* XXX: what if the magic matches, but the sizes are wrong? */ + ump->um_qflags[type] |= QTF_64BIT; + } else { + ump->um_qflags[type] &= ~QTF_64BIT; + } + UFS_UNLOCK(ump); + + return (0); +} + /* * Obtain a dquot structure for the specified identifier and quota file * reading the information from the file if necessary. @@ -1032,6 +1214,8 @@ static int dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type, struct dquot **dqp) { + uint8_t buf[sizeof(struct dqblk64)]; + off_t base, recsize; struct dquot *dq, *dq1; struct dqhash *dqh; struct vnode *dqvp; @@ -1121,8 +1305,7 @@ hfound: DQI_LOCK(dq); if (numdquot < desireddquot) { numdquot++; DQH_UNLOCK(); - dq1 = (struct dquot *)malloc(sizeof *dq, M_DQUOT, - M_WAITOK | M_ZERO); + dq1 = malloc(sizeof *dq1, M_DQUOT, M_WAITOK | M_ZERO); mtx_init(&dq1->dq_lock, "dqlock", NULL, MTX_DEF); DQH_LOCK(); /* @@ -1169,20 +1352,37 @@ hfound: DQI_LOCK(dq); DQREF(dq); DQH_UNLOCK(); + /* + * Read the requested quota record from the quota file, performing + * any necessary conversions. + */ + if (ump->um_qflags[type] & QTF_64BIT) { + recsize = sizeof(struct dqblk64); + base = sizeof(struct dqhdr64); + } else { + recsize = sizeof(struct dqblk32); + base = 0; + } auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - aiov.iov_base = &dq->dq_dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)id * sizeof (struct dqblk); + aiov.iov_base = buf; + aiov.iov_len = recsize; + auio.uio_resid = recsize; + auio.uio_offset = base + id * recsize; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = (struct thread *)0; vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]); - if (auio.uio_resid == sizeof(struct dqblk) && error == 0) - bzero(&dq->dq_dqb, sizeof(struct dqblk)); + if (auio.uio_resid == recsize && error == 0) { + bzero(&dq->dq_dqb, sizeof(dq->dq_dqb)); + } else { + if (ump->um_qflags[type] & QTF_64BIT) + dqb64_dq((struct dqblk64 *)buf, dq); + else + dqb32_dq((struct dqblk32 *)buf, dq); + } if (dqvplocked) vput(dqvp); else @@ -1293,6 +1493,8 @@ sync: static int dqsync(struct vnode *vp, struct dquot *dq) { + uint8_t buf[sizeof(struct dqblk64)]; + off_t base, recsize; struct vnode *dqvp; struct iovec aiov; struct uio auio; @@ -1339,12 +1541,26 @@ dqsync(struct vnode *vp, struct dquot *dq) dq->dq_flags |= DQ_LOCK; DQI_UNLOCK(dq); + /* + * Write the quota record to the quota file, performing any + * necessary conversions. See dqget() for additional details. + */ + if (ump->um_qflags[dq->dq_type] & QTF_64BIT) { + dq_dqb64(dq, (struct dqblk64 *)buf); + recsize = sizeof(struct dqblk64); + base = sizeof(struct dqhdr64); + } else { + dq_dqb32(dq, (struct dqblk32 *)buf); + recsize = sizeof(struct dqblk32); + base = 0; + } + auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - aiov.iov_base = &dq->dq_dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)dq->dq_id * sizeof (struct dqblk); + aiov.iov_base = buf; + aiov.iov_len = recsize; + auio.uio_resid = recsize; + auio.uio_offset = base + dq->dq_id * recsize; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_td = (struct thread *)0; @@ -1357,7 +1573,8 @@ dqsync(struct vnode *vp, struct dquot *dq) DQI_LOCK(dq); DQI_WAKEUP(dq); dq->dq_flags &= ~DQ_MOD; -out: DQI_UNLOCK(dq); +out: + DQI_UNLOCK(dq); vfslocked = VFS_LOCK_GIANT(dqvp->v_mount); if (vp != dqvp) vput(dqvp); @@ -1396,3 +1613,116 @@ dqflush(struct vnode *vp) } DQH_UNLOCK(); } + +/* + * 32-bit / 64-bit conversion functions. + * + * 32-bit quota records are stored in native byte order. Attention must + * be paid to overflow issues. + * + * 64-bit quota records are stored in network byte order. + */ + +#define CLIP32(u64) (u64 > UINT32_MAX ? UINT32_MAX : (uint32_t)u64) + +/* + * Convert 32-bit host-order structure to dquot. + */ +static void +dqb32_dq(const struct dqblk32 *dqb32, struct dquot *dq) +{ + + dq->dq_bhardlimit = dqb32->dqb_bhardlimit; + dq->dq_bsoftlimit = dqb32->dqb_bsoftlimit; + dq->dq_curblocks = dqb32->dqb_curblocks; + dq->dq_ihardlimit = dqb32->dqb_ihardlimit; + dq->dq_isoftlimit = dqb32->dqb_isoftlimit; + dq->dq_curinodes = dqb32->dqb_curinodes; + dq->dq_btime = dqb32->dqb_btime; + dq->dq_itime = dqb32->dqb_itime; +} + +/* + * Convert 64-bit network-order structure to dquot. + */ +static void +dqb64_dq(const struct dqblk64 *dqb64, struct dquot *dq) +{ + + dq->dq_bhardlimit = be64toh(dqb64->dqb_bhardlimit); + dq->dq_bsoftlimit = be64toh(dqb64->dqb_bsoftlimit); + dq->dq_curblocks = be64toh(dqb64->dqb_curblocks); + dq->dq_ihardlimit = be64toh(dqb64->dqb_ihardlimit); + dq->dq_isoftlimit = be64toh(dqb64->dqb_isoftlimit); + dq->dq_curinodes = be64toh(dqb64->dqb_curinodes); + dq->dq_btime = be64toh(dqb64->dqb_btime); + dq->dq_itime = be64toh(dqb64->dqb_itime); +} + +/* + * Convert dquot to 32-bit host-order structure. + */ +static void +dq_dqb32(const struct dquot *dq, struct dqblk32 *dqb32) +{ + + dqb32->dqb_bhardlimit = CLIP32(dq->dq_bhardlimit); + dqb32->dqb_bsoftlimit = CLIP32(dq->dq_bsoftlimit); + dqb32->dqb_curblocks = CLIP32(dq->dq_curblocks); + dqb32->dqb_ihardlimit = CLIP32(dq->dq_ihardlimit); + dqb32->dqb_isoftlimit = CLIP32(dq->dq_isoftlimit); + dqb32->dqb_curinodes = CLIP32(dq->dq_curinodes); + dqb32->dqb_btime = CLIP32(dq->dq_btime); + dqb32->dqb_itime = CLIP32(dq->dq_itime); +} + +/* + * Convert dquot to 64-bit network-order structure. + */ +static void +dq_dqb64(const struct dquot *dq, struct dqblk64 *dqb64) +{ + + dqb64->dqb_bhardlimit = htobe64(dq->dq_bhardlimit); + dqb64->dqb_bsoftlimit = htobe64(dq->dq_bsoftlimit); + dqb64->dqb_curblocks = htobe64(dq->dq_curblocks); + dqb64->dqb_ihardlimit = htobe64(dq->dq_ihardlimit); + dqb64->dqb_isoftlimit = htobe64(dq->dq_isoftlimit); + dqb64->dqb_curinodes = htobe64(dq->dq_curinodes); + dqb64->dqb_btime = htobe64(dq->dq_btime); + dqb64->dqb_itime = htobe64(dq->dq_itime); +} + +/* + * Convert 64-bit host-order structure to 32-bit host-order structure. + */ +static void +dqb64_dqb32(const struct dqblk64 *dqb64, struct dqblk32 *dqb32) +{ + + dqb32->dqb_bhardlimit = CLIP32(dqb64->dqb_bhardlimit); + dqb32->dqb_bsoftlimit = CLIP32(dqb64->dqb_bsoftlimit); + dqb32->dqb_curblocks = CLIP32(dqb64->dqb_curblocks); + dqb32->dqb_ihardlimit = CLIP32(dqb64->dqb_ihardlimit); + dqb32->dqb_isoftlimit = CLIP32(dqb64->dqb_isoftlimit); + dqb32->dqb_curinodes = CLIP32(dqb64->dqb_curinodes); + dqb32->dqb_btime = CLIP32(dqb64->dqb_btime); + dqb32->dqb_itime = CLIP32(dqb64->dqb_itime); +} + +/* + * Convert 32-bit host-order structure to 64-bit host-order structure. + */ +static void +dqb32_dqb64(const struct dqblk32 *dqb32, struct dqblk64 *dqb64) +{ + + dqb64->dqb_bhardlimit = dqb32->dqb_bhardlimit; + dqb64->dqb_bsoftlimit = dqb32->dqb_bsoftlimit; + dqb64->dqb_curblocks = dqb32->dqb_curblocks; + dqb64->dqb_ihardlimit = dqb32->dqb_ihardlimit; + dqb64->dqb_isoftlimit = dqb32->dqb_isoftlimit; + dqb64->dqb_curinodes = dqb32->dqb_curinodes; + dqb64->dqb_btime = dqb32->dqb_btime; + dqb64->dqb_itime = dqb32->dqb_itime; +} diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c index f6b6b1e2ef8..0eeb14fc54e 100644 --- a/sys/ufs/ufs/ufs_vfsops.c +++ b/sys/ufs/ufs/ufs_vfsops.c @@ -127,6 +127,18 @@ ufs_quotactl(mp, cmds, id, arg) error = quotaoff(td, mp, type); break; + case Q_SETQUOTA32: + error = setquota32(td, mp, id, type, arg); + break; + + case Q_SETUSE32: + error = setuse32(td, mp, id, type, arg); + break; + + case Q_GETQUOTA32: + error = getquota32(td, mp, id, type, arg); + break; + case Q_SETQUOTA: error = setquota(td, mp, id, type, arg); break; @@ -139,6 +151,10 @@ ufs_quotactl(mp, cmds, id, arg) error = getquota(td, mp, id, type, arg); break; + case Q_GETQUOTASIZE: + error = getquotasize(td, mp, id, type, arg); + break; + case Q_SYNC: error = qsync(mp); break; diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 9d4d93dbc8f..f8d45cfceb8 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -114,6 +114,8 @@ static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; static vop_pathconf_t ufsfifo_pathconf; +SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); + /* * A virgin directory (no blushing please). */ @@ -974,6 +976,9 @@ ufs_link(ap) error = EXDEV; goto out; } + if (VTOI(tdvp)->i_effnlink < 2) + panic("ufs_link: Bad link count %d on parent", + VTOI(tdvp)->i_effnlink); ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { error = EMLINK; @@ -988,11 +993,11 @@ ufs_link(ap) DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); + softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); if (!error) { ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); + error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0); } if (error) { @@ -1001,7 +1006,7 @@ ufs_link(ap) DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) - softdep_change_linkcnt(ip); + softdep_revert_link(VTOI(tdvp), ip); } out: return (error); @@ -1043,7 +1048,7 @@ ufs_whiteout(ap) newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); + error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0); break; case DELETE: @@ -1062,6 +1067,11 @@ ufs_whiteout(ap) return (error); } +static volatile int rename_restarts; +SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, + __DEVOLATILE(int *, &rename_restarts), 0, + "Times rename had to restart due to lock contention"); + /* * Rename system call. * rename("foo", "bar"); @@ -1101,111 +1111,183 @@ ufs_rename(ap) struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; + struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; - struct inode *ip, *xp, *dp; + struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; - int doingdirectory = 0, oldparent = 0, newparent = 0; + off_t endoff; + int doingdirectory, newparent; int error = 0, ioflag; - ino_t fvp_ino; + struct mount *mp; + ino_t ino; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif + endoff = 0; + mp = tdvp->v_mount; + VOP_UNLOCK(tdvp, 0); + if (tvp && tvp != tdvp) + VOP_UNLOCK(tvp, 0); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; -abortit: - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - vrele(fdvp); - vrele(fvp); - return (error); + mp = NULL; + goto releout; } - + error = vfs_busy(mp, 0); + if (error) { + mp = NULL; + goto releout; + } +relock: + /* + * We need to acquire 2 to 4 locks depending on whether tvp is NULL + * and fdvp and tdvp are the same directory. Subsequently we need + * to double-check all paths and in the directory rename case we + * need to verify that we are not creating a directory loop. To + * handle this we acquire all but fdvp using non-blocking + * acquisitions. If we fail to acquire any lock in the path we will + * drop all held locks, acquire the new lock in a blocking fashion, + * and then release it and restart the rename. This acquire/release + * step ensures that we do not spin on a lock waiting for release. + */ + error = vn_lock(fdvp, LK_EXCLUSIVE); + if (error) + goto releout; + if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { + VOP_UNLOCK(fdvp, 0); + error = vn_lock(tdvp, LK_EXCLUSIVE); + if (error) + goto releout; + VOP_UNLOCK(tdvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + /* + * Re-resolve fvp to be certain it still exists and fetch the + * correct vnode. + */ + error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); + if (error) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + goto releout; + } + error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); + if (error) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + if (error != EBUSY) + goto releout; + error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); + if (error != 0) + goto releout; + VOP_UNLOCK(nvp, 0); + vrele(fvp); + fvp = nvp; + atomic_add_int(&rename_restarts, 1); + goto relock; + } + vrele(fvp); + fvp = nvp; + /* + * Re-resolve tvp and acquire the vnode lock if present. + */ + error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); + if (error != 0 && error != EJUSTRETURN) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fvp, 0); + goto releout; + } + /* + * If tvp disappeared we just carry on. + */ + if (error == EJUSTRETURN && tvp != NULL) { + vrele(tvp); + tvp = NULL; + } + /* + * Get the tvp ino if the lookup succeeded. We may have to restart + * if the non-blocking acquire fails. + */ + if (error == 0) { + nvp = NULL; + error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); + if (tvp) + vrele(tvp); + tvp = nvp; + if (error) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(tdvp, 0); + VOP_UNLOCK(fvp, 0); + if (error != EBUSY) + goto releout; + error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); + if (error != 0) + goto releout; + VOP_UNLOCK(nvp, 0); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + } + fdp = VTOI(fdvp); + fip = VTOI(fvp); + tdp = VTOI(tdvp); + tip = NULL; + if (tvp) + tip = VTOI(tvp); if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; - goto abortit; + goto unlockout; } - /* * Renaming a file to itself has no effect. The upper layers should - * not call us in that case. Temporarily just warn if they do. + * not call us in that case. However, things could change after + * we drop the locks above. */ if (fvp == tvp) { - printf("ufs_rename: fvp == tvp (can't happen)\n"); error = 0; - goto abortit; + goto unlockout; } - - if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) - goto abortit; - dp = VTOI(fdvp); - ip = VTOI(fvp); - if (ip->i_nlink >= LINK_MAX) { - VOP_UNLOCK(fvp, 0); + doingdirectory = 0; + newparent = 0; + ino = fip->i_number; + if (fip->i_nlink >= LINK_MAX) { error = EMLINK; - goto abortit; + goto unlockout; } - if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) - || (dp->i_flags & APPEND)) { - VOP_UNLOCK(fvp, 0); + if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) + || (fdp->i_flags & APPEND)) { error = EPERM; - goto abortit; + goto unlockout; } - if ((ip->i_mode & IFMT) == IFDIR) { + if ((fip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || - dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || - (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp, 0); + fdp == fip || + (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; - goto abortit; + goto unlockout; } - ip->i_flag |= IN_RENAME; - oldparent = dp->i_number; + if (fdp->i_number != tdp->i_number) + newparent = tdp->i_number; doingdirectory = 1; } - vrele(fdvp); - - /* - * When the target exists, both the directory - * and target vnodes are returned locked. - */ - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); - - /* - * 1) Bump link count while we're moving stuff - * around. If we crash somewhere before - * completing our work, the link count - * may be wrong, but correctable. - */ - ip->i_effnlink++; - ip->i_nlink++; - DIP_SET(ip, i_nlink, ip->i_nlink); - ip->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | - DOINGASYNC(fvp)))) != 0) { - VOP_UNLOCK(fvp, 0); - goto bad; + if (fvp->v_mountedhere != NULL || (tvp && tvp->v_mountedhere != NULL)) { + error = EXDEV; + goto unlockout; } /* @@ -1214,35 +1296,55 @@ abortit: * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so - * as to be able to change "..". We must repeat the call - * to namei, as the parent directory is unlocked by the - * call to checkpath(). + * as to be able to change "..". */ - error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); - fvp_ino = ip->i_number; - VOP_UNLOCK(fvp, 0); - if (oldparent != dp->i_number) - newparent = dp->i_number; if (doingdirectory && newparent) { - if (error) /* write access check above */ - goto bad; - if (xp != NULL) - vput(tvp); - error = ufs_checkpath(fvp_ino, dp, tcnp->cn_cred); + error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); if (error) - goto out; + goto unlockout; + error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, + &ino); + /* + * We encountered a lock that we have to wait for. Unlock + * everything else and VGET before restarting. + */ + if (ino) { + VOP_UNLOCK(fdvp, 0); + VOP_UNLOCK(fvp, 0); + VOP_UNLOCK(tdvp, 0); + if (tvp) + VOP_UNLOCK(tvp, 0); + error = VFS_VGET(mp, ino, LK_SHARED, &nvp); + if (error == 0) + vput(nvp); + atomic_add_int(&rename_restarts, 1); + goto relock; + } + if (error) + goto unlockout; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); - VREF(tdvp); - error = relookup(tdvp, &tvp, tcnp); - if (error) - goto out; - vrele(tdvp); - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); } + if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || + tdp->i_effnlink == 0) + panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + fip->i_effnlink++; + fip->i_nlink++; + DIP_SET(fip, i_nlink, fip->i_nlink); + fip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_setup_link(tdp, fip); + error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp))); + if (error) + goto bad; + /* * 2) If target doesn't exist, link the target * to the source and unlink the source. @@ -1250,52 +1352,37 @@ abortit: * entry to reference the source inode and * expunge the original entry's existence. */ - if (xp == NULL) { - if (dp->i_dev != ip->i_dev) + if (tip == NULL) { + if (tdp->i_dev != fip->i_dev) panic("ufs_rename: EXDEV"); - /* - * Account for ".." in new directory. - * When source and destination have the same - * parent we don't fool with the link count. - */ if (doingdirectory && newparent) { - if ((nlink_t)dp->i_nlink >= LINK_MAX) { + /* + * Account for ".." in new directory. + * When source and destination have the same + * parent we don't adjust the link count. The + * actual link modification is completed when + * .. is rewritten below. + */ + if ((nlink_t)tdp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } - dp->i_effnlink++; - dp->i_nlink++; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | - DOINGASYNC(tdvp))); - if (error) - goto bad; } - ufs_makedirentry(ip, tcnp, &newdir); - error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); - if (error) { - if (doingdirectory && newparent) { - dp->i_effnlink--; - dp->i_nlink--; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); - (void)UFS_UPDATE(tdvp, 1); - } + ufs_makedirentry(fip, tcnp, &newdir); + error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1); + if (error) goto bad; - } - vput(tdvp); + /* Setup tdvp for directory compaction if needed. */ + if (tdp->i_count && tdp->i_endoff && + tdp->i_endoff < tdp->i_size) + endoff = tdp->i_endoff; } else { - if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) + if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ - if (xp->i_number == ip->i_number) + if (tip->i_number == fip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the caller @@ -1303,7 +1390,7 @@ abortit: * destination of the rename. This implements append-only * directories. */ - if ((dp->i_mode & S_ISTXT) && + if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; @@ -1314,9 +1401,9 @@ abortit: * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ - if ((xp->i_mode&IFMT) == IFDIR) { - if ((xp->i_effnlink > 2) || - !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { + if ((tip->i_mode & IFMT) == IFDIR) { + if ((tip->i_effnlink > 2) || + !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } @@ -1329,20 +1416,30 @@ abortit: error = EISDIR; goto bad; } - error = ufs_dirrewrite(dp, xp, ip->i_number, - IFTODT(ip->i_mode), - (doingdirectory && newparent) ? newparent : doingdirectory); - if (error) - goto bad; if (doingdirectory) { if (!newparent) { - dp->i_effnlink--; + tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) - softdep_change_linkcnt(dp); + softdep_change_linkcnt(tdp); } - xp->i_effnlink--; + tip->i_effnlink--; if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(xp); + softdep_change_linkcnt(tip); + } + error = ufs_dirrewrite(tdp, tip, fip->i_number, + IFTODT(fip->i_mode), + (doingdirectory && newparent) ? newparent : doingdirectory); + if (error) { + if (doingdirectory) { + if (!newparent) { + tdp->i_effnlink++; + if (DOINGSOFTDEP(tdvp)) + softdep_change_linkcnt(tdp); + } + tip->i_effnlink++; + if (DOINGSOFTDEP(tvp)) + softdep_change_linkcnt(tip); + } } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* @@ -1357,115 +1454,107 @@ abortit: * them now. */ if (!newparent) { - dp->i_nlink--; - DIP_SET(dp, i_nlink, dp->i_nlink); - dp->i_flag |= IN_CHANGE; + tdp->i_nlink--; + DIP_SET(tdp, i_nlink, tdp->i_nlink); + tdp->i_flag |= IN_CHANGE; } - xp->i_nlink--; - DIP_SET(xp, i_nlink, xp->i_nlink); - xp->i_flag |= IN_CHANGE; + tip->i_nlink--; + DIP_SET(tip, i_nlink, tip->i_nlink); + tip->i_flag |= IN_CHANGE; ioflag = IO_NORMAL; if (!DOINGASYNC(tvp)) ioflag |= IO_SYNC; + /* Don't go to bad here as the new link exists. */ if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag, tcnp->cn_cred, tcnp->cn_thread)) != 0) - goto bad; + goto unlockout; } - vput(tdvp); - vput(tvp); - xp = NULL; } /* - * 3) Unlink the source. + * 3) Unlink the source. We have to resolve the path again to + * fixup the directory offset and count for ufs_dirremove. */ - fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((fcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost from startdir"); - VREF(fdvp); - error = relookup(fdvp, &fvp, fcnp); - if (error == 0) - vrele(fdvp); - if (fvp != NULL) { - xp = VTOI(fvp); - dp = VTOI(fdvp); - } else { - /* - * From name has disappeared. IN_RENAME is not sufficient - * to protect against directory races due to timing windows, - * so we have to remove the panic. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. - */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - vrele(ap->a_fvp); - return (0); + if (fdvp == tdvp) { + error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); + if (error) + panic("ufs_rename: from entry went away!"); + if (ino != fip->i_number) + panic("ufs_rename: ino mismatch %d != %d\n", ino, + fip->i_number); } /* - * Ensure that the directory entry still exists and has not - * changed while the new name has been entered. If the source is - * a file then the entry may have been unlinked or renamed. In - * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; the IN_RENAME - * flag ensures that it cannot be moved by another rename or removed - * by a rmdir. + * If the source is a directory with a + * new parent, the link count of the old + * parent directory must be decremented + * and ".." set to point to the new parent. */ - if (xp != ip) { + if (doingdirectory && newparent) { /* - * From name resolves to a different inode. IN_RENAME is - * not sufficient protection against timing window races - * so we can't panic here. XXX the only real way - * to solve this issue is at a much higher level. By the - * time we hit ufs_rename() it's too late. + * If tip exists we simply use its link, otherwise we must + * add a new one. */ -#if 0 - if (doingdirectory) - panic("ufs_rename: lost dir entry"); -#endif - } else { - /* - * If the source is a directory with a - * new parent, the link count of the old - * parent directory must be decremented - * and ".." set to point to the new parent. - */ - if (doingdirectory && newparent) { - xp->i_offset = mastertemplate.dot_reclen; - ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); - cache_purge(fdvp); + if (tip == NULL) { + tdp->i_effnlink++; + tdp->i_nlink++; + DIP_SET(tdp, i_nlink, tdp->i_nlink); + tdp->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(tdvp)) + softdep_setup_dotdot_link(tdp, fip); + error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | + DOINGASYNC(tdvp))); + /* Don't go to bad here as the new link exists. */ + if (error) + goto unlockout; } - error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); - xp->i_flag &= ~IN_RENAME; + fip->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); + cache_purge(fdvp); } - if (dp) - vput(fdvp); - if (xp) - vput(fvp); - vrele(ap->a_fvp); + error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); + +unlockout: + vput(fdvp); + vput(fvp); + if (tvp) + vput(tvp); + /* + * If compaction or fsync was requested do it now that other locks + * are no longer needed. + */ + if (error == 0 && endoff != 0) { +#ifdef UFS_DIRHASH + if (tdp->i_dirhash != NULL) + ufsdirhash_dirtrunc(tdp, endoff); +#endif + UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred, + td); + } + if (error == 0 && tdp->i_flag & IN_NEEDSYNC) + error = VOP_FSYNC(tdvp, MNT_WAIT, td); + vput(tdvp); + if (mp) + vfs_unbusy(mp); return (error); bad: - if (xp) - vput(ITOV(xp)); - vput(ITOV(dp)); -out: - if (doingdirectory) - ip->i_flag &= ~IN_RENAME; - if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { - ip->i_effnlink--; - ip->i_nlink--; - DIP_SET(ip, i_nlink, ip->i_nlink); - ip->i_flag |= IN_CHANGE; - ip->i_flag &= ~IN_RENAME; - if (DOINGSOFTDEP(fvp)) - softdep_change_linkcnt(ip); - vput(fvp); - } else - vrele(fvp); + fip->i_effnlink--; + fip->i_nlink--; + DIP_SET(fip, i_nlink, fip->i_nlink); + fip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_revert_link(tdp, fip); + goto unlockout; + +releout: + vrele(fdvp); + vrele(fvp); + vrele(tdvp); + if (tvp) + vrele(tvp); + if (mp) + vfs_unbusy(mp); + return (error); } @@ -1767,8 +1856,7 @@ ufs_mkdir(ap) ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); - if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); @@ -1784,8 +1872,8 @@ ufs_mkdir(ap) DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); - error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); + softdep_setup_mkdir(dp, ip); + error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); if (error) goto bad; #ifdef MAC @@ -1863,7 +1951,7 @@ ufs_mkdir(ap) else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) goto bad; ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); + error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0); bad: if (error == 0) { @@ -1873,8 +1961,6 @@ bad: dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); dp->i_flag |= IN_CHANGE; - if (DOINGSOFTDEP(dvp)) - softdep_change_linkcnt(dp); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. @@ -1884,7 +1970,8 @@ bad: DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_revert_mkdir(dp, ip); + vput(tvp); } out: @@ -1920,10 +2007,13 @@ ufs_rmdir(ap) * tries to remove a locally mounted on directory). */ error = 0; - if ((ip->i_flag & IN_RENAME) || ip->i_effnlink < 2) { + if (ip->i_effnlink < 2) { error = EINVAL; goto out; } + if (dp->i_effnlink < 3) + panic("ufs_dirrem: Bad link count %d on parent", + dp->i_effnlink); if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; @@ -1947,18 +2037,14 @@ ufs_rmdir(ap) */ dp->i_effnlink--; ip->i_effnlink--; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } + if (DOINGSOFTDEP(vp)) + softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; - if (DOINGSOFTDEP(vp)) { - softdep_change_linkcnt(dp); - softdep_change_linkcnt(ip); - } + if (DOINGSOFTDEP(vp)) + softdep_revert_rmdir(dp, ip); goto out; } cache_purge(dvp); @@ -2464,6 +2550,9 @@ ufs_makeinode(mode, dvp, vpp, cnp) if ((mode & IFMT) == 0) mode |= IFREG; + if (VTOI(dvp)->i_effnlink < 2) + panic("ufs_makeinode: Bad link count %d on parent", + VTOI(dvp)->i_effnlink); error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); @@ -2539,7 +2628,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) { ip->i_mode &= ~ISGID; @@ -2579,7 +2668,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) } #endif /* !UFS_ACL */ ufs_makedirentry(ip, cnp, &newdir); - error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); + error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); if (error) goto bad; *vpp = tvp; @@ -2595,7 +2684,7 @@ bad: DIP_SET(ip, i_nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) - softdep_change_linkcnt(ip); + softdep_revert_create(VTOI(dvp), ip); vput(tvp); return (error); } diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 126867baa2e..d5669179dac 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -57,6 +57,10 @@ struct ucred; struct uio; struct vnode; struct ufs_extattr_per_mount; +struct jblocks; +struct inodedep; + +TAILQ_HEAD(inodedeplst, inodedep); /* This structure describes the UFS specific mount structure data. */ struct ufsmount { @@ -75,6 +79,11 @@ struct ufsmount { long um_numindirdeps; /* outstanding indirdeps */ struct workhead softdep_workitem_pending; /* softdep work queue */ struct worklist *softdep_worklist_tail; /* Tail pointer for above */ + struct workhead softdep_journal_pending; /* journal work queue */ + struct worklist *softdep_journal_tail; /* Tail pointer for above */ + struct jblocks *softdep_jblocks; /* Journal block information */ + struct inodedeplst softdep_unlinked; /* Unlinked inodes */ + int softdep_on_journal; /* Items on the journal list */ int softdep_on_worklist; /* Items on the worklist */ int softdep_on_worklist_inprogress; /* Busy items on worklist */ int softdep_deps; /* Total dependency count */ @@ -120,6 +129,7 @@ struct ufsmount { */ #define QTF_OPENING 0x01 /* Q_QUOTAON in progress */ #define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */ +#define QTF_64BIT 0x04 /* 64-bit quota file */ /* Convert mount ptr to ufsmount ptr. */ #define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data)) diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 9002e775bb9..7d8d3611b37 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -251,12 +251,14 @@ dev_pager_getpages(object, m, count, reqpage) VM_OBJECT_LOCK(object); dev_pager_updatefake(page, paddr, memattr); if (count > 1) { - vm_page_lock_queues(); + for (i = 0; i < count; i++) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); + vm_page_unlock(m[i]); + } } - vm_page_unlock_queues(); } } else { /* @@ -266,10 +268,11 @@ dev_pager_getpages(object, m, count, reqpage) page = dev_pager_getfake(paddr, memattr); VM_OBJECT_LOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; } diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 42cdab3ebec..97674e35cf7 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -152,10 +152,10 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); /* The requested page must remain busy, the others not. */ - if (reqpage != i) { - m[i]->oflags &= ~VPO_BUSY; - m[i]->busy = 0; - } + if (i == reqpage) + vm_page_flash(m[i]); + else + vm_page_wakeup(m[i]); } return (VM_PAGER_OK); } diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 02fda073aea..e4d8e81ec1b 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -98,6 +98,9 @@ extern vm_offset_t kernel_vm_end; void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t); +#if defined(__mips__) +void pmap_align_tlb(vm_offset_t *); +#endif void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t); void pmap_clear_modify(vm_page_t m); void pmap_clear_reference(vm_page_t m); @@ -116,6 +119,7 @@ void pmap_growkernel(vm_offset_t); void pmap_init(void); boolean_t pmap_is_modified(vm_page_t m); boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t va); +boolean_t pmap_is_referenced(vm_page_t m); boolean_t pmap_ts_referenced(vm_page_t m); vm_offset_t pmap_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index a17fe82c2ef..a7b373647a5 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -198,10 +198,11 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index ef64b31ae09..2f97ee8e122 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -378,6 +378,14 @@ static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); +static void +swp_pager_free_nrpage(vm_page_t m) +{ + + if (m->wire_count == 0) + vm_page_free(m); +} + /* * SWP_SIZECHECK() - update swap_pager_full indication * @@ -1101,8 +1109,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * happen. Note that blk, iblk & jblk can be SWAPBLK_NONE, but the * loops are set up such that the case(s) are handled implicitly. * - * The swp_*() calls must be made at splvm(). vm_page_free() does - * not need to be, but it will go a little faster if it is. + * The swp_*() calls must be made with the object locked. */ blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0); @@ -1130,12 +1137,17 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) if (0 < i || j < count) { int k; - vm_page_lock_queues(); - for (k = 0; k < i; ++k) - vm_page_free(m[k]); - for (k = j; k < count; ++k) - vm_page_free(m[k]); - vm_page_unlock_queues(); + + for (k = 0; k < i; ++k) { + vm_page_lock(m[k]); + swp_pager_free_nrpage(m[k]); + vm_page_unlock(m[k]); + } + for (k = j; k < count; ++k) { + vm_page_lock(m[k]); + swp_pager_free_nrpage(m[k]); + vm_page_unlock(m[k]); + } } /* @@ -1212,9 +1224,6 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) VM_OBJECT_LOCK(object); while ((mreq->oflags & VPO_SWAPINPROG) != 0) { mreq->oflags |= VPO_WANTED; - vm_page_lock_queues(); - vm_page_flag_set(mreq, PG_REFERENCED); - vm_page_unlock_queues(); PCPU_INC(cnt.v_intrans); if (msleep(mreq, VM_OBJECT_MTX(object), PSWP, "swread", hz*20)) { printf( @@ -1493,7 +1502,7 @@ swp_pager_async_iodone(struct buf *bp) object = bp->b_pages[0]->object; VM_OBJECT_LOCK(object); } - vm_page_lock_queues(); + /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, @@ -1505,6 +1514,8 @@ swp_pager_async_iodone(struct buf *bp) for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; + vm_page_lock(m); + vm_page_lock_queues(); m->oflags &= ~VPO_SWAPINPROG; if (bp->b_ioflags & BIO_ERROR) { @@ -1533,7 +1544,7 @@ swp_pager_async_iodone(struct buf *bp) */ m->valid = 0; if (i != bp->b_pager.pg_reqpage) - vm_page_free(m); + swp_pager_free_nrpage(m); else vm_page_flash(m); /* @@ -1601,8 +1612,9 @@ swp_pager_async_iodone(struct buf *bp) if (vm_page_count_severe()) vm_page_try_to_cache(m); } + vm_page_unlock_queues(); + vm_page_unlock(m); } - vm_page_unlock_queues(); /* * adjust pip. NOTE: the original parent may still have its own @@ -1698,10 +1710,12 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); + vm_page_lock(m); vm_page_lock_queues(); vm_page_activate(m); vm_page_dirty(m); vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); return; @@ -1710,10 +1724,12 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); + vm_page_lock(m); vm_page_lock_queues(); vm_page_dirty(m); vm_page_dontneed(m); vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); } diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index a2d5633bd51..e1b9a08109d 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1022,10 +1022,8 @@ obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) while (pages != startpages) { pages--; p = TAILQ_LAST(&object->memq, pglist); - vm_page_lock_queues(); vm_page_unwire(p, 0); vm_page_free(p); - vm_page_unlock_queues(); } retkva = 0; goto done; @@ -2891,13 +2889,11 @@ uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) if (kva == 0) return (0); - if (obj == NULL) { - obj = vm_object_allocate(OBJT_DEFAULT, - pages); - } else { + if (obj == NULL) + obj = vm_object_allocate(OBJT_PHYS, pages); + else { VM_OBJECT_LOCK_INIT(obj, "uma object"); - _vm_object_allocate(OBJT_DEFAULT, - pages, obj); + _vm_object_allocate(OBJT_PHYS, pages, obj); } ZONE_LOCK(zone); keg->uk_kva = kva; diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 78d7e280836..1286c170d1c 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -99,9 +99,11 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *next) int vfslocked; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, next)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); return (EAGAIN); } @@ -113,7 +115,8 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *next) vm_page_test_dirty(m); if (m->dirty == 0 && m->hold_count == 0) pmap_remove_all(m); - if (m->dirty) { + if (m->dirty != 0) { + vm_page_unlock(m); if ((object->flags & OBJ_DEAD) != 0) { VM_OBJECT_UNLOCK(object); return (EAGAIN); @@ -137,13 +140,18 @@ vm_contig_launder_page(vm_page_t m, vm_page_t *next) return (0); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { + vm_page_unlock_queues(); m_tmp = m; vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC); VM_OBJECT_UNLOCK(object); + vm_page_lock_queues(); return (0); } - } else if (m->hold_count == 0) - vm_page_cache(m); + } else { + if (m->hold_count == 0) + vm_page_cache(m); + vm_page_unlock(m); + } VM_OBJECT_UNLOCK(object); return (0); } @@ -160,9 +168,14 @@ vm_contig_launder(int queue) if ((m->flags & PG_MARKER) != 0) continue; + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + return (FALSE); + } KASSERT(VM_PAGE_INQUEUE2(m, queue), ("vm_contig_launder: page %p's queue is not %d", m, queue)); error = vm_contig_launder_page(m, &next); + vm_page_lock_assert(m, MA_NOTOWNED); if (error == 0) return (TRUE); if (error == EBUSY) @@ -257,9 +270,7 @@ retry: i -= PAGE_SIZE; m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); - vm_page_lock_queues(); vm_page_free(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(object); vm_map_delete(map, addr, addr + size); diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 83c468e0801..ca6d49c18f2 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -47,6 +47,7 @@ vm_offset_t kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, unsigned long boundary, vm_memattr_t memattr); vm_offset_t kmem_alloc_nofault(vm_map_t, vm_size_t); +vm_offset_t kmem_alloc_nofault_space(vm_map_t, vm_size_t, int); vm_offset_t kmem_alloc_wait(vm_map_t, vm_size_t); void kmem_free(vm_map_t, vm_offset_t, vm_size_t); void kmem_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index f4098567887..05c3228643e 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -137,9 +137,9 @@ release_page(struct faultstate *fs) { vm_page_wakeup(fs->m); - vm_page_lock_queues(); + vm_page_lock(fs->m); vm_page_deactivate(fs->m); - vm_page_unlock_queues(); + vm_page_unlock(fs->m); fs->m = NULL; } @@ -161,9 +161,9 @@ unlock_and_deallocate(struct faultstate *fs) VM_OBJECT_UNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_LOCK(fs->first_object); - vm_page_lock_queues(); + vm_page_lock(fs->first_m); vm_page_free(fs->first_m); - vm_page_unlock_queues(); + vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_UNLOCK(fs->first_object); fs->first_m = NULL; @@ -211,7 +211,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, { vm_prot_t prot; int is_first_object_locked, result; - boolean_t are_queues_locked, growstack, wired; + boolean_t growstack, wired; int map_generation; vm_object_t next_object; vm_page_t marray[VM_FAULT_READ]; @@ -305,12 +305,12 @@ RetryFault:; * removes the page from the backing object, * which is not what we want. */ + vm_page_lock(fs.m); vm_page_lock_queues(); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); - vm_page_unlock_queues(); unlock_and_deallocate(&fs); goto RetryFault; } @@ -332,13 +332,20 @@ RetryFault:; * to pmap it. */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_flag_set(fs.m, PG_REFERENCED); vm_page_unlock_queues(); + vm_page_unlock(fs.m); VM_OBJECT_UNLOCK(fs.object); if (fs.object != fs.first_object) { VM_OBJECT_LOCK(fs.first_object); - vm_page_lock_queues(); + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); - vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_UNLOCK(fs.first_object); fs.first_m = NULL; @@ -358,6 +365,7 @@ RetryFault:; } vm_pageq_remove(fs.m); vm_page_unlock_queues(); + vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the @@ -462,7 +470,6 @@ readrest: else firstpindex = fs.first_pindex - 2 * VM_FAULT_READ; - are_queues_locked = FALSE; /* * note: partially valid pages cannot be * included in the lookahead - NFS piecemeal @@ -479,22 +486,23 @@ readrest: if (mt->busy || (mt->oflags & VPO_BUSY)) continue; - if (!are_queues_locked) { - are_queues_locked = TRUE; - vm_page_lock_queues(); - } + vm_page_lock(mt); + vm_page_lock_queues(); if (mt->hold_count || - mt->wire_count) + mt->wire_count) { + vm_page_unlock_queues(); + vm_page_unlock(mt); continue; + } pmap_remove_all(mt); if (mt->dirty) { vm_page_deactivate(mt); } else { vm_page_cache(mt); } - } - if (are_queues_locked) vm_page_unlock_queues(); + vm_page_unlock(mt); + } ahead += behind; behind = 0; } @@ -623,17 +631,17 @@ vnode_locked: */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { - vm_page_lock_queues(); + vm_page_lock(fs.m); vm_page_free(fs.m); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this @@ -746,18 +754,20 @@ vnode_locked: * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { - vm_page_lock_queues(); /* * get rid of the unnecessary page */ + vm_page_lock(fs.first_m); vm_page_free(fs.first_m); + vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ + vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; @@ -770,10 +780,13 @@ vnode_locked: fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { - vm_page_lock_queues(); + vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); + vm_page_unlock(fs.first_m); + + vm_page_lock(fs.m); vm_page_unwire(fs.m, FALSE); - vm_page_unlock_queues(); + vm_page_unlock(fs.m); } /* * We no longer need the old page or object. @@ -923,8 +936,7 @@ vnode_locked: if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_LOCK(fs.object); - vm_page_lock_queues(); - vm_page_flag_set(fs.m, PG_REFERENCED); + vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon @@ -935,10 +947,9 @@ vnode_locked: vm_page_wire(fs.m); else vm_page_unwire(fs.m, 1); - } else { + } else vm_page_activate(fs.m); - } - vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* @@ -1015,9 +1026,11 @@ vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) { + vm_page_lock(m); vm_page_lock_queues(); pmap_enter_quick(pmap, addr, m, entry->protection); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(lobject); } @@ -1080,6 +1093,7 @@ vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, { vm_paddr_t pa; vm_offset_t va; + vm_page_t m; pmap_t pmap; pmap = vm_map_pmap(map); @@ -1093,9 +1107,10 @@ vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, if (pa != 0) { pmap_change_wiring(pmap, va, FALSE); if (!fictitious) { - vm_page_lock_queues(); - vm_page_unwire(PHYS_TO_VM_PAGE(pa), 1); - vm_page_unlock_queues(); + m = PHYS_TO_VM_PAGE(pa); + vm_page_lock(m); + vm_page_unwire(m, TRUE); + vm_page_unlock(m); } } } @@ -1238,13 +1253,20 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_LOCK(dst_object); - vm_page_lock_queues(); + if (upgrade) { + vm_page_lock(src_m); vm_page_unwire(src_m, 0); + vm_page_unlock(src_m); + + vm_page_lock(dst_m); vm_page_wire(dst_m); - } else + vm_page_unlock(dst_m); + } else { + vm_page_lock(dst_m); vm_page_activate(dst_m); - vm_page_unlock_queues(); + vm_page_unlock(dst_m); + } vm_page_wakeup(dst_m); } VM_OBJECT_UNLOCK(dst_object); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index c080ca04c6c..0b9d92a1df1 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -257,16 +257,16 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) if (m == NULL) goto out; if (rv != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); m = NULL; goto out; } } - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); out: VM_OBJECT_UNLOCK(object); @@ -300,9 +300,9 @@ vm_imgact_unmap_page(struct sf_buf *sf) m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } void @@ -373,8 +373,17 @@ vm_thread_new(struct thread *td, int pages) /* * Get a kernel virtual address for this thread's kstack. */ +#if defined(__mips__) + /* + * We need to align the kstack's mapped address to fit within + * a single TLB entry. + */ + ks = kmem_alloc_nofault_space(kernel_map, + (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, VMFS_TLB_ALIGNED_SPACE); +#else ks = kmem_alloc_nofault(kernel_map, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); +#endif if (ks == 0) { printf("vm_thread_new: kstack allocation failed\n"); vm_object_deallocate(ksobj); @@ -425,10 +434,10 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); vm_object_deallocate(ksobj); @@ -514,10 +523,10 @@ vm_thread_swapout(struct thread *td) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); - vm_page_lock_queues(); vm_page_dirty(m); + vm_page_lock(m); vm_page_unwire(m, 0); - vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); } @@ -529,23 +538,37 @@ static void vm_thread_swapin(struct thread *td) { vm_object_t ksobj; - vm_page_t m, ma[KSTACK_MAX_PAGES]; - int i, pages, rv; + vm_page_t ma[KSTACK_MAX_PAGES]; + int i, j, k, pages, rv; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_LOCK(ksobj); - for (i = 0; i < pages; i++) { - m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | + for (i = 0; i < pages; i++) + ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED); - if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(ksobj, &m, 1, 0); + for (i = 0; i < pages; i++) { + if (ma[i]->valid != VM_PAGE_BITS_ALL) { + KASSERT(ma[i]->oflags & VPO_BUSY, + ("lost busy 1")); + vm_object_pip_add(ksobj, 1); + for (j = i + 1; j < pages; j++) { + KASSERT(ma[j]->valid == VM_PAGE_BITS_ALL || + (ma[j]->oflags & VPO_BUSY), + ("lost busy 2")); + if (ma[j]->valid == VM_PAGE_BITS_ALL) + break; + } + rv = vm_pager_get_pages(ksobj, ma + i, j - i, 0); if (rv != VM_PAGER_OK) - panic("vm_thread_swapin: cannot get kstack for proc: %d", td->td_proc->p_pid); - m = vm_page_lookup(ksobj, i); - } - ma[i] = m; - vm_page_wakeup(m); + panic("vm_thread_swapin: cannot get kstack for proc: %d", + td->td_proc->p_pid); + vm_object_pip_wakeup(ksobj); + for (k = i; k < j; k++) + ma[k] = vm_page_lookup(ksobj, k); + vm_page_wakeup(ma[i]); + } else if (ma[i]->oflags & VPO_BUSY) + vm_page_wakeup(ma[i]); } VM_OBJECT_UNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 90065721da8..95a4e9d6302 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -118,6 +118,35 @@ kmem_alloc_nofault(map, size) return (addr); } +/* + * kmem_alloc_nofault_space: + * + * Allocate a virtual address range with no underlying object and + * no initial mapping to physical memory within the specified + * address space. Any mapping from this range to physical memory + * must be explicitly created prior to its use, typically with + * pmap_qenter(). Any attempt to create a mapping on demand + * through vm_fault() will result in a panic. + */ +vm_offset_t +kmem_alloc_nofault_space(map, size, find_space) + vm_map_t map; + vm_size_t size; + int find_space; +{ + vm_offset_t addr; + int result; + + size = round_page(size); + addr = vm_map_min(map); + result = vm_map_find(map, NULL, 0, &addr, size, find_space, + VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); + if (result != KERN_SUCCESS) { + return (0); + } + return (addr); +} + /* * Allocate wired-down memory in the kernel's address map * or a submap. @@ -351,10 +380,8 @@ retry: i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); - vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_free(m); - vm_page_unlock_queues(); } VM_OBJECT_UNLOCK(kmem_object); vm_map_delete(map, addr, addr + size); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index fe0c0f51378..2a57e33b6e5 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1394,14 +1394,29 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_map_unlock(map); return (KERN_NO_SPACE); } - if (find_space == VMFS_ALIGNED_SPACE) + switch (find_space) { + case VMFS_ALIGNED_SPACE: pmap_align_superpage(object, offset, addr, length); + break; +#ifdef VMFS_TLB_ALIGNED_SPACE + case VMFS_TLB_ALIGNED_SPACE: + pmap_align_tlb(addr); + break; +#endif + default: + break; + } + start = *addr; } result = vm_map_insert(map, object, offset, start, start + length, prot, max, cow); - } while (result == KERN_NO_SPACE && find_space == VMFS_ALIGNED_SPACE); + } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE +#ifdef VMFS_TLB_ALIGNED_SPACE + || find_space == VMFS_TLB_ALIGNED_SPACE +#endif + )); vm_map_unlock(map); return (result); } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 5454ce6ed15..d5c5b511c43 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -326,6 +326,9 @@ long vmspace_wired_count(struct vmspace *vmspace); #define VMFS_NO_SPACE 0 /* don't find; use the given range */ #define VMFS_ANY_SPACE 1 /* find a range with any alignment */ #define VMFS_ALIGNED_SPACE 2 /* find a superpage-aligned range */ +#if defined(__mips__) +#define VMFS_TLB_ALIGNED_SPACE 3 /* find a TLB entry aligned range */ +#endif /* * vm_map_wire and vm_map_unwire option flags diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index a47cd6a0f88..f9b3db3c6ff 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -866,16 +866,16 @@ RestartScan: */ if (m != NULL && m->valid != 0) { mincoreinfo = MINCORE_INCORE; + vm_page_lock(m); vm_page_lock_queues(); if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || - pmap_ts_referenced(m)) { - vm_page_flag_set(m, PG_REFERENCED); + pmap_is_referenced(m)) mincoreinfo |= MINCORE_REFERENCED_OTHER; - } vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(current->object.vm_object); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 399cb10a1f9..5428c98f544 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -717,19 +717,18 @@ vm_object_terminate(vm_object_t object) * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - vm_page_lock_queues(); while ((p = TAILQ_FIRST(&object->memq)) != NULL) { KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p " "p->busy = %d, p->oflags %x\n", p, p->busy, p->oflags)); + vm_page_lock(p); if (p->wire_count == 0) { vm_page_free(p); - cnt.v_pfree++; - } else { + PCPU_INC(cnt.v_pfree); + } else vm_page_remove(p); - } + vm_page_unlock(p); } - vm_page_unlock_queues(); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) @@ -772,6 +771,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int int pagerflags; int curgeneration; + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((object->flags & OBJ_MIGHTBEDIRTY) == 0) return; @@ -789,7 +789,6 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int tend = end; } - vm_page_lock_queues(); /* * If the caller is smart and only msync()s a range he knows is * dirty, we may be able to avoid an object scan. This results in @@ -818,13 +817,19 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int ++tscan; continue; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0) { + vm_page_unlock_queues(); + vm_page_unlock(p); if (--scanlimit == 0) break; ++tscan; continue; } + vm_page_unlock_queues(); + vm_page_unlock(p); /* * If we have been asked to skip nosync pages and * this is a nosync page, we can't continue. @@ -842,6 +847,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int * page (i.e. had to sleep). */ tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags); + } /* @@ -851,7 +857,6 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int * return immediately. */ if (tscan >= tend && (tstart || tend < object->size)) { - vm_page_unlock_queues(); vm_object_clear_flag(object, OBJ_CLEANING); return; } @@ -871,8 +876,13 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int p->oflags |= VPO_CLEANCHK; if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) clearobjflags = 0; - else + else { + vm_page_lock(p); + vm_page_lock_queues(); pmap_remove_write(p); + vm_page_unlock_queues(); + vm_page_unlock(p); + } } if (clearobjflags && (tstart == 0) && (tend == object->size)) @@ -895,12 +905,17 @@ again: continue; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0) { + vm_page_unlock_queues(); + vm_page_unlock(p); p->oflags &= ~VPO_CLEANCHK; continue; } - + vm_page_unlock_queues(); + vm_page_unlock(p); /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were @@ -923,12 +938,10 @@ again: * Try to optimize the next page. If we can't we pick up * our (random) scan where we left off. */ - if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { + if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) if ((p = vm_page_lookup(object, pi + n)) != NULL) goto again; - } } - vm_page_unlock_queues(); #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); #endif @@ -950,10 +963,11 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, vm_page_t mab[vm_pageout_page_count]; vm_page_t ma[vm_pageout_page_count]; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); + vm_page_lock_assert(p, MA_NOTOWNED); + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); pi = p->pindex; while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { - vm_page_lock_queues(); if (object->generation != curgeneration) { return(0); } @@ -968,11 +982,17 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_lock(tp); + vm_page_lock_queues(); vm_page_test_dirty(tp); if (tp->dirty == 0) { + vm_page_unlock(tp); + vm_page_unlock_queues(); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_unlock(tp); + vm_page_unlock_queues(); maf[ i - 1 ] = tp; maxf++; continue; @@ -992,11 +1012,17 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_lock(tp); + vm_page_lock_queues(); vm_page_test_dirty(tp); if (tp->dirty == 0) { + vm_page_unlock_queues(); + vm_page_unlock(tp); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_unlock_queues(); + vm_page_unlock(tp); mab[ i - 1 ] = tp; maxb++; continue; @@ -1022,7 +1048,11 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, vm_pageout_flush(ma, runlen, pagerflags); for (i = 0; i < runlen; i++) { if (ma[i]->dirty) { + vm_page_lock(ma[i]); + vm_page_lock_queues(); pmap_remove_write(ma[i]); + vm_page_unlock_queues(); + vm_page_unlock(ma[i]); ma[i]->oflags |= VPO_CLEANCHK; /* @@ -1162,7 +1192,8 @@ shadowlookup: (tobject->flags & OBJ_ONEMAPPING) == 0) { goto unlock_tobject; } - } + } else if (tobject->type == OBJT_PHYS) + goto unlock_tobject; m = vm_page_lookup(tobject, tpindex); if (m == NULL && advise == MADV_WILLNEED) { /* @@ -1189,28 +1220,33 @@ shadowlookup: VM_OBJECT_UNLOCK(tobject); tobject = backing_object; goto shadowlookup; - } + } else if (m->valid != VM_PAGE_BITS_ALL) + goto unlock_tobject; /* - * If the page is busy or not in a normal active state, - * we skip it. If the page is not managed there are no - * page queues to mess with. Things can break if we mess - * with pages in any of the below states. + * If the page is not in a normal state, skip it. */ + vm_page_lock(m); vm_page_lock_queues(); - if (m->hold_count || - m->wire_count || - (m->flags & PG_UNMANAGED) || - m->valid != VM_PAGE_BITS_ALL) { + if (m->hold_count != 0 || m->wire_count != 0) { vm_page_unlock_queues(); + vm_page_unlock(m); goto unlock_tobject; } if ((m->oflags & VPO_BUSY) || m->busy) { - vm_page_flag_set(m, PG_REFERENCED); + if (advise == MADV_WILLNEED) + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_flag_set(m, PG_REFERENCED); vm_page_unlock_queues(); + vm_page_unlock(m); if (object != tobject) VM_OBJECT_UNLOCK(object); m->oflags |= VPO_WANTED; - msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0); + msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", + 0); VM_OBJECT_LOCK(object); goto relookup; } @@ -1240,6 +1276,7 @@ shadowlookup: vm_page_dontneed(m); } vm_page_unlock_queues(); + vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: @@ -1402,7 +1439,6 @@ retry: m = TAILQ_NEXT(m, listq); } } - vm_page_lock_queues(); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); @@ -1415,19 +1451,18 @@ retry: * not be changed by this operation. */ if ((m->oflags & VPO_BUSY) || m->busy) { - vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(new_object); m->oflags |= VPO_WANTED; msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); VM_OBJECT_LOCK(new_object); goto retry; } + vm_page_lock(m); vm_page_rename(m, new_object, idx); + vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); } - vm_page_unlock_queues(); if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's @@ -1553,9 +1588,6 @@ vm_object_backing_scan(vm_object_t object, int op) } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { - vm_page_lock_queues(); - vm_page_flag_set(p, PG_REFERENCED); - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); p->oflags |= VPO_WANTED; msleep(p, VM_OBJECT_MTX(backing_object), @@ -1598,14 +1630,14 @@ vm_object_backing_scan(vm_object_t object, int op) * Page is out of the parent object's range, we * can simply destroy it. */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1622,14 +1654,14 @@ vm_object_backing_scan(vm_object_t object, int op) * * Leave the parent's page alone */ - vm_page_lock_queues(); + vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); - vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1649,9 +1681,9 @@ vm_object_backing_scan(vm_object_t object, int op) * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock_queues(); + vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock_queues(); + vm_page_unlock(p); /* page automatically made dirty by rename */ } p = next; @@ -1916,7 +1948,7 @@ again: p = TAILQ_NEXT(p, listq); } } - vm_page_lock_queues(); + /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex @@ -1935,6 +1967,8 @@ again: * cannot be freed. They can, however, be invalidated * if "clean_only" is FALSE. */ + vm_page_lock(p); + vm_page_lock_queues(); if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { /* Fictitious pages do not have managed mappings. */ @@ -1946,6 +1980,8 @@ again: p->valid = 0; vm_page_undirty(p); } + vm_page_unlock_queues(); + vm_page_unlock(p); continue; } if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) @@ -1954,16 +1990,20 @@ again: ("vm_object_page_remove: page %p is fictitious", p)); if (clean_only && p->valid) { pmap_remove_write(p); - if (p->dirty) + if (p->dirty) { + vm_page_unlock_queues(); + vm_page_unlock(p); continue; + } } pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ if (wirings != 0) p->wire_count -= wirings; vm_page_free(p); + vm_page_unlock_queues(); + vm_page_unlock(p); } - vm_page_unlock_queues(); vm_object_pip_wakeup(object); skipmemq: if (__predict_false(object->cache != NULL)) @@ -1998,9 +2038,9 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) if (m == NULL) break; if (rv != VM_PAGER_OK) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); break; } } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 7c149c9bc88..455c9902945 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -129,6 +130,24 @@ __FBSDID("$FreeBSD$"); #include +#if defined(__amd64__) || defined (__i386__) +extern struct sysctl_oid_list sysctl__vm_pmap_children; +#else +SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +#endif + +static uint64_t pmap_tryrelock_calls; +SYSCTL_QUAD(_vm_pmap, OID_AUTO, tryrelock_calls, CTLFLAG_RD, + &pmap_tryrelock_calls, 0, "Number of tryrelock calls"); + +static int pmap_tryrelock_restart; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_restart, CTLFLAG_RD, + &pmap_tryrelock_restart, 0, "Number of tryrelock restarts"); + +static int pmap_tryrelock_race; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_race, CTLFLAG_RD, + &pmap_tryrelock_race, 0, "Number of tryrelock pmap race cases"); + /* * Associated with page of user-allocatable memory is a * page structure. @@ -138,6 +157,8 @@ struct vpgqueues vm_page_queues[PQ_COUNT]; struct vpglocks vm_page_queue_lock; struct vpglocks vm_page_queue_free_lock; +struct vpglocks pa_lock[PA_LOCK_COUNT] __aligned(CACHE_LINE_SIZE); + vm_page_t vm_page_array = 0; int vm_page_array_size = 0; long first_page = 0; @@ -157,6 +178,43 @@ CTASSERT(sizeof(u_long) >= 8); #endif #endif +/* + * Try to acquire a physical address lock while a pmap is locked. If we + * fail to trylock we unlock and lock the pmap directly and cache the + * locked pa in *locked. The caller should then restart their loop in case + * the virtual to physical mapping has changed. + */ +int +vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) +{ + vm_paddr_t lockpa; + uint32_t gen_count; + + gen_count = pmap->pm_gen_count; + atomic_add_long((volatile long *)&pmap_tryrelock_calls, 1); + lockpa = *locked; + *locked = pa; + if (lockpa) { + PA_LOCK_ASSERT(lockpa, MA_OWNED); + if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) + return (0); + PA_UNLOCK(lockpa); + } + if (PA_TRYLOCK(pa)) + return (0); + PMAP_UNLOCK(pmap); + atomic_add_int((volatile int *)&pmap_tryrelock_restart, 1); + PA_LOCK(pa); + PMAP_LOCK(pmap); + + if (pmap->pm_gen_count != gen_count + 1) { + pmap->pm_retries++; + atomic_add_int((volatile int *)&pmap_tryrelock_race, 1); + return (EAGAIN); + } + return (0); +} + /* * vm_set_page_size: * @@ -271,6 +329,11 @@ vm_page_startup(vm_offset_t vaddr) mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL, MTX_DEF); + /* Setup page locks. */ + for (i = 0; i < PA_LOCK_COUNT; i++) + mtx_init(&pa_lock[i].data, "page lock", NULL, + MTX_DEF | MTX_RECURSE | MTX_DUPOK); + /* * Initialize the queue headers for the hold queue, the active queue, * and the inactive queue. @@ -489,7 +552,7 @@ void vm_page_hold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } @@ -497,7 +560,7 @@ void vm_page_unhold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); --mem->hold_count; KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); if (mem->hold_count == 0 && VM_PAGE_INQUEUE2(mem, PQ_HOLD)) @@ -533,7 +596,7 @@ vm_page_free_zero(vm_page_t m) /* * vm_page_sleep: * - * Sleep and release the page queues lock. + * Sleep and release the page and page queues locks. * * The object containing the given page must be locked. */ @@ -542,10 +605,10 @@ vm_page_sleep(vm_page_t m, const char *msg) { VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); - if (!mtx_owned(&vm_page_queue_mtx)) - vm_page_lock_queues(); - vm_page_flag_set(m, PG_REFERENCED); - vm_page_unlock_queues(); + if (mtx_owned(&vm_page_queue_mtx)) + vm_page_unlock_queues(); + if (mtx_owned(vm_page_lockptr(m))) + vm_page_unlock(m); /* * It's possible that while we sleep, the page will get @@ -724,6 +787,8 @@ vm_page_remove(vm_page_t m) vm_object_t object; vm_page_t root; + if ((m->flags & PG_UNMANAGED) == 0) + vm_page_lock_assert(m, MA_OWNED); if ((object = m->object) == NULL) return; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -731,7 +796,6 @@ vm_page_remove(vm_page_t m) m->oflags &= ~VPO_BUSY; vm_page_flash(m); } - mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Now remove from the object's list of backed pages. @@ -1310,21 +1374,25 @@ vm_page_enqueue(int queue, vm_page_t m) * Ensure that act_count is at least ACT_INIT but do not otherwise * mess with it. * - * The page queues must be locked. + * The page must be locked. * This routine may not block. */ void vm_page_activate(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) { - vm_pageq_remove(m); if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; + vm_page_lock_queues(); + vm_pageq_remove(m); vm_page_enqueue(PQ_ACTIVE, m); - } + vm_page_unlock_queues(); + } else + KASSERT(m->queue == PQ_NONE, + ("vm_page_activate: wired page %p is queued", m)); } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; @@ -1380,10 +1448,11 @@ void vm_page_free_toq(vm_page_t m) { - if (VM_PAGE_GETQUEUE(m) != PQ_NONE) - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - KASSERT(!pmap_page_is_mapped(m), - ("vm_page_free_toq: freeing mapped page %p", m)); + if ((m->flags & PG_UNMANAGED) == 0) { + vm_page_lock_assert(m, MA_OWNED); + KASSERT(!pmap_page_is_mapped(m), + ("vm_page_free_toq: freeing mapped page %p", m)); + } PCPU_INC(cnt.v_tfree); if (m->busy || VM_PAGE_IS_FREE(m)) { @@ -1403,7 +1472,11 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - vm_pageq_remove(m); + if (VM_PAGE_GETQUEUE(m) != PQ_NONE) { + vm_page_lock_queues(); + vm_pageq_remove(m); + vm_page_unlock_queues(); + } vm_page_remove(m); /* @@ -1426,7 +1499,9 @@ vm_page_free_toq(vm_page_t m) } if (m->hold_count != 0) { m->flags &= ~PG_ZERO; + vm_page_lock_queues(); vm_page_enqueue(PQ_HOLD, m); + vm_page_unlock_queues(); } else { /* * Restore the default memory attribute to the page. @@ -1463,7 +1538,7 @@ vm_page_free_toq(vm_page_t m) * another map, removing it from paging queues * as necessary. * - * The page queues must be locked. + * The page must be locked. * This routine may not block. */ void @@ -1475,12 +1550,15 @@ vm_page_wire(vm_page_t m) * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->flags & PG_FICTITIOUS) return; if (m->wire_count == 0) { - if ((m->flags & PG_UNMANAGED) == 0) + if ((m->flags & PG_UNMANAGED) == 0) { + vm_page_lock_queues(); vm_pageq_remove(m); + vm_page_unlock_queues(); + } atomic_add_int(&cnt.v_wire_count, 1); } m->wire_count++; @@ -1512,38 +1590,39 @@ vm_page_wire(vm_page_t m) * be placed in the cache - for example, just after dirtying a page. * dirty pages in the cache are not allowed. * - * The page queues must be locked. + * The page must be locked. * This routine may not block. */ void vm_page_unwire(vm_page_t m, int activate) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & PG_UNMANAGED) == 0) + vm_page_lock_assert(m, MA_OWNED); if (m->flags & PG_FICTITIOUS) return; if (m->wire_count > 0) { m->wire_count--; if (m->wire_count == 0) { atomic_subtract_int(&cnt.v_wire_count, 1); - if (m->flags & PG_UNMANAGED) { - ; - } else if (activate) + if ((m->flags & PG_UNMANAGED) != 0) + return; + vm_page_lock_queues(); + if (activate) vm_page_enqueue(PQ_ACTIVE, m); else { vm_page_flag_clear(m, PG_WINATCFLS); vm_page_enqueue(PQ_INACTIVE, m); } + vm_page_unlock_queues(); } } else { panic("vm_page_unwire: invalid wire count: %d", m->wire_count); } } - /* - * Move the specified page to the inactive queue. If the page has - * any associated swap, the swap is deallocated. + * Move the specified page to the inactive queue. * * Normally athead is 0 resulting in LRU operation. athead is set * to 1 if we want this page to be 'as if it were placed in the cache', @@ -1555,7 +1634,7 @@ static inline void _vm_page_deactivate(vm_page_t m, int athead) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); /* * Ignore if already inactive. @@ -1563,6 +1642,7 @@ _vm_page_deactivate(vm_page_t m, int athead) if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) return; if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { + vm_page_lock_queues(); vm_page_flag_clear(m, PG_WINATCFLS); vm_pageq_remove(m); if (athead) @@ -1571,13 +1651,20 @@ _vm_page_deactivate(vm_page_t m, int athead) TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); VM_PAGE_SETQUEUE2(m, PQ_INACTIVE); cnt.v_inactive_count++; + vm_page_unlock_queues(); } } +/* + * Move the specified page to the inactive queue. + * + * The page must be locked. + */ void vm_page_deactivate(vm_page_t m) { - _vm_page_deactivate(m, 0); + + _vm_page_deactivate(m, 0); } /* @@ -1590,6 +1677,7 @@ vm_page_try_to_cache(vm_page_t m) { mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) { @@ -1613,6 +1701,7 @@ vm_page_try_to_free(vm_page_t m) { mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || @@ -1640,6 +1729,7 @@ vm_page_cache(vm_page_t m) vm_page_t root; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy || @@ -1772,6 +1862,8 @@ vm_page_dontneed(vm_page_t m) int head; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); dnw = ++dnweight; /* @@ -1826,15 +1918,25 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { - if (vm_page_sleep_if_busy(m, TRUE, "pgrbwt")) { + if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { + if ((allocflags & VM_ALLOC_RETRY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_lock_queues(); + vm_page_flag_set(m, PG_REFERENCED); + } + vm_page_sleep(m, "pgrbwt"); if ((allocflags & VM_ALLOC_RETRY) == 0) return (NULL); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock_queues(); + vm_page_lock(m); vm_page_wire(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } if ((allocflags & VM_ALLOC_NOBUSY) == 0) vm_page_busy(m); @@ -2133,6 +2235,7 @@ vm_page_cowfault(vm_page_t m) vm_object_t object; vm_pindex_t pindex; + vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->paging_in_progress != 0, @@ -2142,22 +2245,23 @@ vm_page_cowfault(vm_page_t m) retry_alloc: pmap_remove_all(m); + vm_page_unlock_queues(); vm_page_remove(m); mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (mnew == NULL) { vm_page_insert(m, object, pindex); - vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); VM_WAIT; VM_OBJECT_LOCK(object); if (m == vm_page_lookup(object, pindex)) { + vm_page_lock(m); vm_page_lock_queues(); goto retry_alloc; } else { /* * Page disappeared during the wait. */ - vm_page_lock_queues(); return; } } @@ -2168,7 +2272,12 @@ vm_page_cowfault(vm_page_t m) * waiting to allocate a page. If so, put things back * the way they were */ + vm_page_unlock(m); + vm_page_lock(mnew); + vm_page_lock_queues(); vm_page_free(mnew); + vm_page_unlock_queues(); + vm_page_unlock(mnew); vm_page_insert(m, object, pindex); } else { /* clear COW & copy page */ if (!so_zerocp_fullpage) @@ -2177,6 +2286,7 @@ vm_page_cowfault(vm_page_t m) vm_page_dirty(mnew); mnew->wire_count = m->wire_count - m->cow; m->wire_count = m->cow; + vm_page_unlock(m); } } @@ -2184,7 +2294,7 @@ void vm_page_cowclear(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->cow) { m->cow--; /* @@ -2200,11 +2310,13 @@ int vm_page_cowsetup(vm_page_t m) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_OWNED); if (m->cow == USHRT_MAX - 1) return (EBUSY); m->cow++; + vm_page_lock_queues(); pmap_remove_write(m); + vm_page_unlock_queues(); return (0); } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 662af98be83..b2b92e92a2c 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -90,32 +90,33 @@ * and sundry status bits. * * Fields in this structure are locked either by the lock on the - * object that the page belongs to (O) or by the lock on the page - * queues (P). + * object that the page belongs to (O), its corresponding page lock (P), + * or by the lock on the page queues (Q). + * */ TAILQ_HEAD(pglist, vm_page); struct vm_page { - TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */ + TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ struct vm_page *left; /* splay tree link (O) */ struct vm_page *right; /* splay tree link (O) */ vm_object_t object; /* which object am I in (O,P)*/ - vm_pindex_t pindex; /* offset into object (O,P) */ + vm_pindex_t pindex; /* offset into object (O,Q) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependant stuff */ - uint8_t queue; /* page queue index */ + uint8_t queue; /* page queue index (P,Q) */ int8_t segind; u_short flags; /* see below */ uint8_t order; /* index of the buddy queue */ uint8_t pool; - u_short cow; /* page cow mapping count */ + u_short cow; /* page cow mapping count (Q) */ u_int wire_count; /* wired down maps refs (P) */ - short hold_count; /* page hold count */ + short hold_count; /* page hold count (P) */ u_short oflags; /* page flags (O) */ - u_char act_count; /* page usage count */ + u_char act_count; /* page usage count (P) */ u_char busy; /* page busy count (O) */ /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ @@ -177,9 +178,35 @@ struct vpglocks { } __aligned(CACHE_LINE_SIZE); extern struct vpglocks vm_page_queue_free_lock; +extern struct vpglocks pa_lock[]; + +#if defined(__arm__) +#define PDRSHIFT PDR_SHIFT +#elif !defined(PDRSHIFT) +#define PDRSHIFT 21 +#endif + +#define pa_index(pa) ((pa) >> PDRSHIFT) +#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT].data +#define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) +#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) +#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) +#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) +#define PA_UNLOCK_COND(pa) \ + do { \ + if (pa) \ + PA_UNLOCK(pa); \ + } while (0) + +#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) + +#define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) +#define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) +#define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) +#define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) +#define vm_page_lock_assert(m, a) mtx_assert(vm_page_lockptr((m)), (a)) #define vm_page_queue_free_mtx vm_page_queue_free_lock.data - /* * These are the flags defined for vm_page. * @@ -324,6 +351,7 @@ void vm_page_dontneed(vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); +int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_requeue(vm_page_t m); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index dc2b3b77d56..49b6ac9dba9 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -215,6 +215,17 @@ static void vm_req_vmdaemon(int req); #endif static void vm_pageout_page_stats(void); +static void +vm_pageout_init_marker(vm_page_t marker, u_short queue) +{ + + bzero(marker, sizeof(*marker)); + marker->flags = PG_FICTITIOUS | PG_MARKER; + marker->oflags = VPO_BUSY; + marker->queue = queue; + marker->wire_count = 1; +} + /* * vm_pageout_fallback_object_lock: * @@ -237,22 +248,16 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) u_short queue; vm_object_t object; - /* - * Initialize our marker - */ - bzero(&marker, sizeof(marker)); - marker.flags = PG_FICTITIOUS | PG_MARKER; - marker.oflags = VPO_BUSY; - marker.queue = m->queue; - marker.wire_count = 1; - queue = m->queue; + vm_pageout_init_marker(&marker, queue); object = m->object; TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* Page queue might have changed. */ @@ -265,6 +270,43 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) return (unchanged); } +/* + * Lock the page while holding the page queue lock. Use marker page + * to detect page queue changes and maintain notion of next page on + * page queue. Return TRUE if no changes were detected, FALSE + * otherwise. The page is locked on return. The page queue lock might + * be dropped and reacquired. + * + * This function depends on normal struct vm_page being type stable. + */ +boolean_t +vm_pageout_page_lock(vm_page_t m, vm_page_t *next) +{ + struct vm_page marker; + boolean_t unchanged; + u_short queue; + + vm_page_lock_assert(m, MA_NOTOWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + if (vm_page_trylock(m)) + return (TRUE); + + queue = m->queue; + vm_pageout_init_marker(&marker, queue); + + TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); + vm_page_unlock_queues(); + vm_page_lock(m); + vm_page_lock_queues(); + + /* Page queue might have changed. */ + *next = TAILQ_NEXT(&marker, pageq); + unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq)); + TAILQ_REMOVE(&vm_page_queues[queue].pl, &marker, pageq); + return (unchanged); +} + /* * vm_pageout_clean: * @@ -275,8 +317,7 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) * late and we cannot do anything that will mess with the page. */ static int -vm_pageout_clean(m) - vm_page_t m; +vm_pageout_clean(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count]; @@ -284,7 +325,8 @@ vm_pageout_clean(m) int ib, is, page_base; vm_pindex_t pindex = m->pindex; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_NOTOWNED); + vm_page_lock(m); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); /* @@ -301,6 +343,7 @@ vm_pageout_clean(m) */ if ((m->hold_count != 0) || ((m->busy != 0) || (m->oflags & VPO_BUSY))) { + vm_page_unlock(m); return 0; } @@ -347,14 +390,19 @@ more: ib = 0; break; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || - p->wire_count != 0 || /* may be held by buf cache */ p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock(p); + vm_page_unlock_queues(); ib = 0; break; } + vm_page_unlock_queues(); + vm_page_unlock(p); mc[--page_base] = p; ++pageout_count; ++ib; @@ -375,13 +423,18 @@ more: if ((p->oflags & VPO_BUSY) || p->busy) { break; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || - p->wire_count != 0 || /* may be held by buf cache */ p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock_queues(); + vm_page_unlock(p); break; } + vm_page_unlock_queues(); + vm_page_unlock(p); mc[page_base + pageout_count] = p; ++pageout_count; ++is; @@ -395,6 +448,7 @@ more: if (ib && pageout_count < vm_pageout_page_count) goto more; + vm_page_unlock(m); /* * we allow reads during pageouts... */ @@ -418,8 +472,9 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) int numpagedout = 0; int i; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); + /* * Initiate I/O. Bump the vm_page_t->busy counter and * mark the pages read-only. @@ -435,17 +490,21 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); vm_page_io_start(mc[i]); + vm_page_lock(mc[i]); + vm_page_lock_queues(); pmap_remove_write(mc[i]); + vm_page_unlock(mc[i]); + vm_page_unlock_queues(); } - vm_page_unlock_queues(); vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); - vm_page_lock_queues(); for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; + vm_page_lock(mt); + vm_page_lock_queues(); KASSERT(pageout_status[i] == VM_PAGER_PEND || (mt->flags & PG_WRITEABLE) == 0, ("vm_pageout_flush: page %p is not write protected", mt)); @@ -487,6 +546,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) if (vm_page_count_severe()) vm_page_try_to_cache(mt); } + vm_page_unlock_queues(); + vm_page_unlock(mt); } return numpagedout; } @@ -510,17 +571,17 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) { vm_object_t backing_object, object; vm_page_t p, next; - int actcount, rcount, remove_mode; + int actcount, remove_mode; VM_OBJECT_LOCK_ASSERT(first_object, MA_OWNED); if (first_object->type == OBJT_DEVICE || - first_object->type == OBJT_SG || - first_object->type == OBJT_PHYS) + first_object->type == OBJT_SG) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; - if (object->paging_in_progress) + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + if (object->type == OBJT_PHYS || object->paging_in_progress) goto unlock_return; remove_mode = 0; @@ -529,22 +590,23 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) /* * scan the objects entire memory queue */ - rcount = object->resident_page_count; p = TAILQ_FIRST(&object->memq); - vm_page_lock_queues(); - while (p && (rcount-- > 0)) { - if (pmap_resident_count(pmap) <= desired) { - vm_page_unlock_queues(); + while (p != NULL) { + if (pmap_resident_count(pmap) <= desired) goto unlock_return; - } next = TAILQ_NEXT(p, listq); + if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) { + p = next; + continue; + } + vm_page_lock(p); + vm_page_lock_queues(); cnt.v_pdpages++; if (p->wire_count != 0 || p->hold_count != 0 || - p->busy != 0 || - (p->oflags & VPO_BUSY) || - (p->flags & PG_UNMANAGED) || !pmap_page_exists_quick(pmap, p)) { + vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -578,9 +640,10 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) } else if (p->queue == PQ_INACTIVE) { pmap_remove_all(p); } + vm_page_unlock_queues(); + vm_page_unlock(p); p = next; } - vm_page_unlock_queues(); if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_LOCK(backing_object); @@ -702,14 +765,7 @@ vm_pageout_scan(int pass) */ page_shortage = vm_paging_target() + addl_page_shortage_init; - /* - * Initialize our marker - */ - bzero(&marker, sizeof(marker)); - marker.flags = PG_FICTITIOUS | PG_MARKER; - marker.oflags = VPO_BUSY; - marker.queue = PQ_INACTIVE; - marker.wire_count = 1; + vm_pageout_init_marker(&marker, PQ_INACTIVE); /* * Start scanning the inactive queue for pages we can move to the @@ -747,7 +803,6 @@ rescan0: } next = TAILQ_NEXT(m, pageq); - object = m->object; /* * skip marker pages @@ -755,26 +810,39 @@ rescan0: if (m->flags & PG_MARKER) continue; + /* + * Lock the page. + */ + if (!vm_pageout_page_lock(m, &next)) { + vm_page_unlock(m); + addl_page_shortage++; + continue; + } + /* * A held page may be undergoing I/O, so skip it. */ - if (m->hold_count) { + if (m->hold_count || (object = m->object) == NULL) { + vm_page_unlock(m); vm_page_requeue(m); addl_page_shortage++; continue; } + /* * Don't mess with busy pages, keep in the front of the * queue, most likely are being paged out. */ if (!VM_OBJECT_TRYLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) { + m->hold_count != 0)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); addl_page_shortage++; continue; } if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); addl_page_shortage++; continue; @@ -803,6 +871,7 @@ rescan0: vm_page_activate(m); VM_OBJECT_UNLOCK(object); m->act_count += (actcount + ACT_ADVANCE); + vm_page_unlock(m); continue; } @@ -818,6 +887,7 @@ rescan0: vm_page_activate(m); VM_OBJECT_UNLOCK(object); m->act_count += (actcount + ACT_ADVANCE + 1); + vm_page_unlock(m); continue; } @@ -903,6 +973,7 @@ rescan0: * Those objects are in a "rundown" state. */ if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); continue; @@ -942,6 +1013,8 @@ rescan0: * of time. */ if (object->type == OBJT_VNODE) { + vm_page_unlock_queues(); + vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { @@ -949,11 +1022,11 @@ rescan0: ++pageout_lock_miss; if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; + vm_page_lock_queues(); goto unlock_and_continue; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); - vm_page_unlock_queues(); vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); vfslocked = VFS_LOCK_GIANT(vp->v_mount); @@ -968,6 +1041,7 @@ rescan0: goto unlock_and_continue; } VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* * The page might have been moved to another @@ -978,6 +1052,7 @@ rescan0: if (VM_PAGE_GETQUEUE(m) != PQ_INACTIVE || m->object != object || TAILQ_NEXT(m, pageq) != &marker) { + vm_page_unlock(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; @@ -990,6 +1065,7 @@ rescan0: * statistics are more correct if we don't. */ if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); goto unlock_and_continue; } @@ -998,12 +1074,14 @@ rescan0: * be undergoing I/O, so skip it */ if (m->hold_count) { + vm_page_unlock(m); vm_page_requeue(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; } } + vm_page_unlock(m); /* * If a page is dirty, then it is either being washed @@ -1015,11 +1093,14 @@ rescan0: * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ + vm_page_unlock_queues(); if (vm_pageout_clean(m) != 0) { --page_shortage; --maxlaunder; } + vm_page_lock_queues(); unlock_and_continue: + vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_UNLOCK(object); if (mp != NULL) { vm_page_unlock_queues(); @@ -1033,8 +1114,10 @@ unlock_and_continue: next = TAILQ_NEXT(&marker, pageq); TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq); + vm_page_lock_assert(m, MA_NOTOWNED); continue; } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); } @@ -1053,6 +1136,7 @@ unlock_and_continue: */ pcount = cnt.v_active_count; m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { @@ -1065,9 +1149,16 @@ unlock_and_continue: m = next; continue; } + if (!vm_pageout_page_lock(m, &next) || + (object = m->object) == NULL) { + vm_page_unlock(m); + m = next; + continue; + } if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1078,6 +1169,7 @@ unlock_and_continue: if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1124,7 +1216,8 @@ unlock_and_continue: m->act_count == 0) { page_shortage--; if (object->ref_count == 0) { - pmap_remove_all(m); + KASSERT(!pmap_page_is_mapped(m), + ("vm_pageout_scan: page %p is mapped", m)); if (m->dirty == 0) vm_page_cache(m); else @@ -1136,6 +1229,7 @@ unlock_and_continue: vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } @@ -1315,15 +1409,21 @@ vm_pageout_page_stats() ("vm_pageout_page_stats: page %p isn't active", m)); next = TAILQ_NEXT(m, pageq); - object = m->object; - if ((m->flags & PG_MARKER) != 0) { m = next; continue; } + vm_page_lock_assert(m, MA_NOTOWNED); + if (!vm_pageout_page_lock(m, &next) || + (object = m->object) == NULL) { + vm_page_unlock(m); + m = next; + continue; + } if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1334,6 +1434,7 @@ vm_pageout_page_stats() if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1370,6 +1471,7 @@ vm_pageout_page_stats() vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 15ca5708a0c..4759dd7c0ea 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -104,5 +104,6 @@ extern void vm_waitpfault(void); boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); int vm_pageout_flush(vm_page_t *, int, int); void vm_pageout_oom(int shortage); +boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); #endif #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 2ff26036353..c404989cd25 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -126,6 +126,14 @@ struct xswdev { #define KERN_NOT_RECEIVER 7 #define KERN_NO_ACCESS 8 +#ifndef PA_LOCK_COUNT +#ifdef SMP +#define PA_LOCK_COUNT 32 +#else +#define PA_LOCK_COUNT 1 +#endif /* !SMP */ +#endif /* !PA_LOCK_COUNT */ + #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \ diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index aedc794c519..cfe9f994e0e 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -429,9 +429,11 @@ vnode_pager_setsize(vp, nsize) * bits. This would prevent bogus_page * replacement from working properly. */ + vm_page_lock(m); vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); vm_page_unlock_queues(); + vm_page_unlock(m); } else if ((nsize & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(nsize), @@ -719,11 +721,13 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); if (error == EOPNOTSUPP) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); error = vnode_pager_input_old(object, m[reqpage]); @@ -731,11 +735,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) return (error); } else if (error != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); @@ -747,11 +752,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) } else if ((PAGE_SIZE / bsize) > 1 && (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); @@ -765,11 +771,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) */ VM_OBJECT_LOCK(object); if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return VM_PAGER_OK; } else if (reqblock == -1) { @@ -777,11 +784,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) KASSERT(m[reqpage]->dirty == 0, ("vnode_pager_generic_getpages: page %p is dirty", m)); m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_OK); } @@ -800,11 +808,12 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, &runpg) != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -818,9 +827,9 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) (object->un_pager.vnp.vnp_size >> 32), (uintmax_t)object->un_pager.vnp.vnp_size); } - vm_page_lock_queues(); + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); VM_OBJECT_UNLOCK(object); runend = i + 1; first = runend; @@ -829,18 +838,20 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) runend = i + runpg; if (runend <= reqpage) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (j = i; j < runend; j++) + for (j = i; j < runend; j++) { + vm_page_lock(m[j]); vm_page_free(m[j]); - vm_page_unlock_queues(); + vm_page_unlock(m[j]); + } VM_OBJECT_UNLOCK(object); } else { if (runpg < (count - first)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (i = first + runpg; i < count; i++) + for (i = first + runpg; i < count; i++) { + vm_page_lock(m[i]); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); count = first + runpg; } @@ -931,7 +942,6 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) relpbuf(bp, &vnode_pbuf_freecnt); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { vm_page_t mt; @@ -980,17 +990,23 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) * now tell them that it is ok to use */ if (!error) { - if (mt->oflags & VPO_WANTED) + if (mt->oflags & VPO_WANTED) { + vm_page_lock(mt); vm_page_activate(mt); - else + vm_page_unlock(mt); + } else { + vm_page_lock(mt); vm_page_deactivate(mt); + vm_page_unlock(mt); + } vm_page_wakeup(mt); } else { + vm_page_lock(mt); vm_page_free(mt); + vm_page_unlock(mt); } } } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); if (error) { printf("vnode_pager_getpages: I/O read error\n"); @@ -1113,10 +1129,12 @@ vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) maxsize = object->un_pager.vnp.vnp_size - poffset; ncount = btoc(maxsize); if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { + vm_page_lock(m[ncount - 1]); vm_page_lock_queues(); vm_page_clear_dirty(m[ncount - 1], pgoff, PAGE_SIZE - pgoff); vm_page_unlock_queues(); + vm_page_unlock(m[ncount - 1]); } } else { maxsize = 0; diff --git a/sys/x86/isa/clock.c b/sys/x86/isa/clock.c index 83026d6c258..36dc36e68c0 100644 --- a/sys/x86/isa/clock.c +++ b/sys/x86/isa/clock.c @@ -186,8 +186,8 @@ clkintr(struct trapframe *frame) * timers. */ int cpu = PCPU_GET(cpuid); - if (lapic_cyclic_clock_func[cpu] != NULL) - (*lapic_cyclic_clock_func[cpu])(frame); + if (cyclic_clock_func[cpu] != NULL) + (*cyclic_clock_func[cpu])(frame); #endif if (using_atrtc_timer) {