diff --git a/sys/amd64/amd64/bios.c b/sys/amd64/amd64/bios.c index 747a3a14394..93574385f5e 100644 --- a/sys/amd64/amd64/bios.c +++ b/sys/amd64/amd64/bios.c @@ -384,12 +384,16 @@ bios16(struct bios_args *args, char *fmt, ...) args->seg.code32.limit = 0xffff; ptd = (pd_entry_t *)rcr3(); - if (ptd == (u_int *)IdlePTD) { +#ifdef PAE + if (ptd == IdlePDPT) { +#else + if (ptd == IdlePTD) { +#endif /* * no page table, so create one and install it. */ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index d1bac065890..12e2e19a257 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif @@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext) 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ @@ -341,7 +354,11 @@ begin: xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ @@ -749,6 +766,11 @@ no_kernend: movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) @@ -804,6 +826,12 @@ no_kernend: fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) @@ -889,4 +917,11 @@ no_kernend: movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif + ret diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index d1bac065890..12e2e19a257 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif @@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext) 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ @@ -341,7 +354,11 @@ begin: xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ @@ -749,6 +766,11 @@ no_kernend: movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) @@ -804,6 +826,12 @@ no_kernend: fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) @@ -889,4 +917,11 @@ no_kernend: movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif + ret diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 80b941e2c60..ac14f721efb 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1578,11 +1578,13 @@ int15e820: if (smap->length == 0) goto next_run; +#ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } +#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -2071,7 +2073,11 @@ init386(first) dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = @@ -2115,7 +2121,11 @@ init386(first) /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index 5c26a7393d2..e2450073a3a 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -40,6 +40,8 @@ #include "assym.s" +#define R(x) ((x)-KERNBASE) + /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. @@ -74,8 +76,16 @@ NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* Now enable paging mode */ - movl IdlePTD-KERNBASE, %eax +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else + movl R(IdlePTD), %eax movl %eax,%cr3 +#endif movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 059c0076534..76b45b4219f 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -197,6 +197,10 @@ static int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef PAE +static uma_zone_t pdptzone; +#endif + /* * Data for the pv entry allocation mechanism */ @@ -248,7 +252,10 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#ifdef PAE +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif static pd_entry_t pdir4mb; @@ -323,6 +330,9 @@ pmap_bootstrap(firstaddr, loadaddr) * Initialize the kernel pmap (which is statically allocated). */ kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); @@ -504,12 +514,21 @@ pmap_set_opt(void) } static void * -pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { *flags = UMA_SLAB_PRIV; return (void *)kmem_alloc(kernel_map, bytes); } +#ifdef PAE +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -545,9 +564,15 @@ pmap_init(phys_start, phys_end) initial_pvs = MINPV; pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - uma_zone_set_allocf(pvzone, pmap_allocf); + uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); +#ifdef PAE + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif + /* * Now it is safe to enable pv_table recording. */ @@ -1241,6 +1266,9 @@ pmap_pinit0(pmap) { pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1265,9 +1293,18 @@ pmap_pinit(pmap) * No need to allocate page table space yet but we do need a valid * page directory table. */ - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); +#ifdef PAE + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif + } /* * allocate object for the ptes @@ -1310,6 +1347,9 @@ pmap_pinit(pmap) for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif } pmap->pm_active = 0; @@ -1485,6 +1525,10 @@ pmap_release(pmap_t pmap) vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); +#ifdef PAE + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); @@ -1680,7 +1724,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) pt_entry_t oldpte; vm_page_t m; - oldpte = atomic_readandclear_int(ptq); + oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* @@ -1846,7 +1890,7 @@ pmap_remove_all(vm_page_t m) while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - tpte = atomic_readandclear_int(pte); + tpte = pte_load_clear(pte); if (tpte & PG_W) pv->pv_pmap->pm_stats.wired_count--; if (tpte & PG_A) @@ -3283,7 +3327,11 @@ pmap_activate(struct thread *td) #else pmap->pm_active |= 1; #endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else cr3 = vtophys(pmap->pm_pdir); +#endif /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index fdd95688827..8b395b2a159 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -170,7 +170,11 @@ cpu_fork(td1, p2, td2, flags) * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; @@ -342,7 +346,11 @@ cpu_set_upcall(struct thread *td, void *pcb) * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; diff --git a/sys/amd64/include/bus_amd64.h b/sys/amd64/include/bus_amd64.h index 20d21ec18da..e1f6e8e3a9a 100644 --- a/sys/amd64/include/bus_amd64.h +++ b/sys/amd64/include/bus_amd64.h @@ -92,15 +92,23 @@ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) diff --git a/sys/amd64/include/bus_at386.h b/sys/amd64/include/bus_at386.h index 20d21ec18da..e1f6e8e3a9a 100644 --- a/sys/amd64/include/bus_at386.h +++ b/sys/amd64/include/bus_at386.h @@ -92,15 +92,23 @@ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index d7f0f669e8e..8fcf2cbb3c9 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -88,8 +88,12 @@ * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). */ #ifndef KVA_PAGES +#ifdef PAE +#define KVA_PAGES 512 +#else #define KVA_PAGES 256 #endif +#endif /* * Pte related macros @@ -97,8 +101,12 @@ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)< -typedef u_int32_t pd_entry_t; -typedef u_int32_t pt_entry_t; +#ifdef PAE + +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#else + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; #define PTESHIFT (2) #define PDESHIFT (2) +#endif + /* * Address of current and alternate address space page table maps * and directories. @@ -149,6 +170,9 @@ extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde[], APTDpde[]; +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #endif @@ -183,6 +207,30 @@ pmap_kextract(vm_offset_t va) } #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) + +#ifdef PAE + +static __inline pt_entry_t +pte_load_clear(pt_entry_t *pte) +{ + pt_entry_t r; + + r = *pte; + __asm __volatile( + "1:\n" + "\tcmpxchg8b %1\n" + "\tjnz 1b" + : "+A" (r) + : "m" (*pte), "b" (0), "c" (0)); + return (r); +} + +#else + +#define pte_load_clear(pte) atomic_readandclear_int(pte) + +#endif + #endif /* @@ -202,6 +250,10 @@ struct pmap { int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +#ifdef PAE + pdpt_entry_t *pm_pdpt; /* KVA of page director pointer + table */ +#endif }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index 81fd7db8da9..868cfbdb654 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -33,6 +33,9 @@ APIC_IO opt_global.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h +# Physical address extensions and support for >4G ram. As above. +PAE opt_global.h + CLK_CALIBRATION_LOOP opt_clock.h CLK_USE_I8254_CALIBRATION opt_clock.h CLK_USE_TSC_CALIBRATION opt_clock.h diff --git a/sys/i386/i386/bios.c b/sys/i386/i386/bios.c index 747a3a14394..93574385f5e 100644 --- a/sys/i386/i386/bios.c +++ b/sys/i386/i386/bios.c @@ -384,12 +384,16 @@ bios16(struct bios_args *args, char *fmt, ...) args->seg.code32.limit = 0xffff; ptd = (pd_entry_t *)rcr3(); - if (ptd == (u_int *)IdlePTD) { +#ifdef PAE + if (ptd == IdlePDPT) { +#else + if (ptd == IdlePTD) { +#endif /* * no page table, so create one and install it. */ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)ptd + KERNBASE); + ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); *ptd = vtophys(pte) | PG_RW | PG_V; } else { /* diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index d1bac065890..12e2e19a257 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */ .globl IdlePTD IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + #ifdef SMP .globl KPTphys #endif @@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext) 1: /* Now enable paging */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ +#endif movl %cr0,%eax /* get control word */ orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* and let's page NOW! */ @@ -341,7 +354,11 @@ begin: xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else movl IdlePTD,%esi +#endif movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) pushl physfree /* value of first for init386(first) */ @@ -749,6 +766,11 @@ no_kernend: movl %esi,R(KPTphys) /* Allocate Page Table Directory */ +#ifdef PAE + /* XXX only need 32 bytes (easier for now) */ + ALLOCPAGES(1) + movl %esi,R(IdlePDPT) +#endif ALLOCPAGES(NPGPTD) movl %esi,R(IdlePTD) @@ -804,6 +826,12 @@ no_kernend: fillkptphys(%edx) /* Map page directory. */ +#ifdef PAE + movl R(IdlePDPT), %eax + movl $1, %ecx + fillkptphys($PG_RW) +#endif + movl R(IdlePTD), %eax movl $NPGPTD, %ecx fillkptphys($PG_RW) @@ -889,4 +917,11 @@ no_kernend: movl $NPGPTD,%ecx fillkpt(R(IdlePTD), $PG_RW) +#ifdef PAE + movl R(IdlePTD), %eax + xorl %ebx, %ebx + movl $NPGPTD, %ecx + fillkpt(R(IdlePDPT), $0x0) +#endif + ret diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 80b941e2c60..ac14f721efb 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1578,11 +1578,13 @@ int15e820: if (smap->length == 0) goto next_run; +#ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); goto next_run; } +#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -2071,7 +2073,11 @@ init386(first) dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = @@ -2115,7 +2121,11 @@ init386(first) /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s index 5c26a7393d2..e2450073a3a 100644 --- a/sys/i386/i386/mpboot.s +++ b/sys/i386/i386/mpboot.s @@ -40,6 +40,8 @@ #include "assym.s" +#define R(x) ((x)-KERNBASE) + /* * this code MUST be enabled here and in mp_machdep.c * it follows the very early stages of AP boot by placing values in CMOS ram. @@ -74,8 +76,16 @@ NON_GPROF_ENTRY(MPentry) CHECKPOINT(0x36, 3) /* Now enable paging mode */ - movl IdlePTD-KERNBASE, %eax +#ifdef PAE + movl R(IdlePDPT), %eax + movl %eax, %cr3 + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 +#else + movl R(IdlePTD), %eax movl %eax,%cr3 +#endif movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 059c0076534..76b45b4219f 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -197,6 +197,10 @@ static int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; +#ifdef PAE +static uma_zone_t pdptzone; +#endif + /* * Data for the pv entry allocation mechanism */ @@ -248,7 +252,10 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); -static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#ifdef PAE +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif static pd_entry_t pdir4mb; @@ -323,6 +330,9 @@ pmap_bootstrap(firstaddr, loadaddr) * Initialize the kernel pmap (which is statically allocated). */ kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); @@ -504,12 +514,21 @@ pmap_set_opt(void) } static void * -pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { *flags = UMA_SLAB_PRIV; return (void *)kmem_alloc(kernel_map, bytes); } +#ifdef PAE +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -545,9 +564,15 @@ pmap_init(phys_start, phys_end) initial_pvs = MINPV; pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - uma_zone_set_allocf(pvzone, pmap_allocf); + uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); +#ifdef PAE + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif + /* * Now it is safe to enable pv_table recording. */ @@ -1241,6 +1266,9 @@ pmap_pinit0(pmap) { pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1265,9 +1293,18 @@ pmap_pinit(pmap) * No need to allocate page table space yet but we do need a valid * page directory table. */ - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); +#ifdef PAE + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif + } /* * allocate object for the ptes @@ -1310,6 +1347,9 @@ pmap_pinit(pmap) for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif } pmap->pm_active = 0; @@ -1485,6 +1525,10 @@ pmap_release(pmap_t pmap) vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); +#ifdef PAE + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); @@ -1680,7 +1724,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) pt_entry_t oldpte; vm_page_t m; - oldpte = atomic_readandclear_int(ptq); + oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* @@ -1846,7 +1890,7 @@ pmap_remove_all(vm_page_t m) while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pv->pv_pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - tpte = atomic_readandclear_int(pte); + tpte = pte_load_clear(pte); if (tpte & PG_W) pv->pv_pmap->pm_stats.wired_count--; if (tpte & PG_A) @@ -3283,7 +3327,11 @@ pmap_activate(struct thread *td) #else pmap->pm_active |= 1; #endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else cr3 = vtophys(pmap->pm_pdir); +#endif /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s index 1f36e0e7b35..68b8b915eb2 100644 --- a/sys/i386/i386/vm86bios.s +++ b/sys/i386/i386/vm86bios.s @@ -123,6 +123,9 @@ ENTRY(vm86_bioscall) movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ +#ifdef PAE + movl IdlePDPT,%ecx +#endif movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index fdd95688827..8b395b2a159 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -170,7 +170,11 @@ cpu_fork(td1, p2, td2, flags) * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; @@ -342,7 +346,11 @@ cpu_set_upcall(struct thread *td, void *pcb) * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ +#ifdef PAE + pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); +#else pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); +#endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index ad57438e24c..93c2a36cc82 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -102,7 +102,11 @@ typedef __uint64_t __uint_least64_t; typedef __uint32_t __u_register_t; typedef __uint32_t __vm_offset_t; typedef __int64_t __vm_ooffset_t; +#ifdef PAE +typedef __uint64_t __vm_paddr_t; +#else typedef __uint32_t __vm_paddr_t; +#endif typedef __uint64_t __vm_pindex_t; typedef __uint32_t __vm_size_t; diff --git a/sys/i386/include/bus_at386.h b/sys/i386/include/bus_at386.h index 20d21ec18da..e1f6e8e3a9a 100644 --- a/sys/i386/include/bus_at386.h +++ b/sys/i386/include/bus_at386.h @@ -92,15 +92,23 @@ /* * Bus address and size types */ -typedef u_int bus_addr_t; -typedef u_int bus_size_t; +#ifdef PAE +typedef uint64_t bus_addr_t; +#else +typedef uint32_t bus_addr_t; +#endif +typedef uint32_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF +#ifdef PAE +#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL +#else #define BUS_SPACE_MAXADDR 0xFFFFFFFF +#endif #define BUS_SPACE_UNRESTRICTED (~0) diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h index 4da42a942c7..c3b9c597bb8 100644 --- a/sys/i386/include/param.h +++ b/sys/i386/include/param.h @@ -87,8 +87,13 @@ #define PAGE_MASK (PAGE_SIZE-1) #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#ifdef PAE +#define NPGPTD 4 +#define PDRSHIFT 21 /* LOG2(NBPDR) */ +#else #define NPGPTD 1 #define PDRSHIFT 22 /* LOG2(NBPDR) */ +#endif #define NBPTD (NPGPTD< -typedef u_int32_t pd_entry_t; -typedef u_int32_t pt_entry_t; +#ifdef PAE + +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#else + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; #define PTESHIFT (2) #define PDESHIFT (2) +#endif + /* * Address of current and alternate address space page table maps * and directories. @@ -149,6 +170,9 @@ extern pt_entry_t PTmap[], APTmap[]; extern pd_entry_t PTD[], APTD[]; extern pd_entry_t PTDpde[], APTDpde[]; +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ #endif @@ -183,6 +207,30 @@ pmap_kextract(vm_offset_t va) } #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) + +#ifdef PAE + +static __inline pt_entry_t +pte_load_clear(pt_entry_t *pte) +{ + pt_entry_t r; + + r = *pte; + __asm __volatile( + "1:\n" + "\tcmpxchg8b %1\n" + "\tjnz 1b" + : "+A" (r) + : "m" (*pte), "b" (0), "c" (0)); + return (r); +} + +#else + +#define pte_load_clear(pte) atomic_readandclear_int(pte) + +#endif + #endif /* @@ -202,6 +250,10 @@ struct pmap { int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ +#ifdef PAE + pdpt_entry_t *pm_pdpt; /* KVA of page director pointer + table */ +#endif }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))