MFC 196705 and 196707:

- Improve pmap_change_attr() on i386 so that it is able to demote a large
  (2/4MB) page into 4KB pages as needed.  This should be fairly rare in
  practice.
- Simplify pmap_change_attr() a bit:
  - Always calculate the cache bits instead of doing it on-demand.
  - Always set changed to TRUE rather than only doing it if it is false.

Approved by:	re (kib)
This commit is contained in:
John Baldwin 2009-09-03 13:54:58 +00:00
parent 5b628e0c26
commit bf202eb1c6
3 changed files with 181 additions and 73 deletions

View file

@ -4476,7 +4476,8 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
if (base < DMAP_MIN_ADDRESS)
return (EINVAL);
cache_bits_pde = cache_bits_pte = -1;
cache_bits_pde = pmap_cache_bits(mode, 1);
cache_bits_pte = pmap_cache_bits(mode, 0);
changed = FALSE;
/*
@ -4493,8 +4494,6 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
* memory type, then we need not demote this page. Just
* increment tmpva to the next 1GB page frame.
*/
if (cache_bits_pde < 0)
cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) {
tmpva = trunc_1gpage(tmpva) + NBPDP;
continue;
@ -4522,8 +4521,6 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
* memory type, then we need not demote this page. Just
* increment tmpva to the next 2MB page frame.
*/
if (cache_bits_pde < 0)
cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
tmpva = trunc_2mpage(tmpva) + NBPDR;
continue;
@ -4557,12 +4554,9 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
for (tmpva = base; tmpva < base + size; ) {
pdpe = pmap_pdpe(kernel_pmap, tmpva);
if (*pdpe & PG_PS) {
if (cache_bits_pde < 0)
cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) {
pmap_pde_attr(pdpe, cache_bits_pde);
if (!changed)
changed = TRUE;
changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {
@ -4588,12 +4582,9 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
}
pde = pmap_pdpe_to_pde(pdpe, tmpva);
if (*pde & PG_PS) {
if (cache_bits_pde < 0)
cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
pmap_pde_attr(pde, cache_bits_pde);
if (!changed)
changed = TRUE;
changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {
@ -4616,13 +4607,10 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
}
tmpva = trunc_2mpage(tmpva) + NBPDR;
} else {
if (cache_bits_pte < 0)
cache_bits_pte = pmap_cache_bits(mode, 0);
pte = pmap_pde_to_pte(pde, tmpva);
if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
pmap_pte_attr(pte, cache_bits_pte);
if (!changed)
changed = TRUE;
changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {

View file

@ -288,12 +288,15 @@ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
vm_page_t *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@ -2289,32 +2292,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
}
/*
* Tries to demote a 2- or 4MB page mapping.
* Fills a page table page with mappings to consecutive physical pages.
*/
static void
pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
{
pt_entry_t *pte;
for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
*pte = newpte;
newpte += PAGE_SIZE;
}
}
/*
* Tries to demote a 2- or 4MB page mapping. If demotion fails, the
* 2- or 4MB page mapping is invalidated.
*/
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
pd_entry_t newpde, oldpde;
pmap_t allpmaps_entry;
pt_entry_t *firstpte, newpte, *pte;
pt_entry_t *firstpte, newpte;
vm_paddr_t mptepa;
vm_page_t free, mpte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpde = *pde;
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
mpte = pmap_lookup_pt_page(pmap, va);
if (mpte != NULL)
pmap_remove_pt_page(pmap, mpte);
else {
KASSERT((*pde & PG_W) == 0,
KASSERT((oldpde & PG_W) == 0,
("pmap_demote_pde: page table page for a wired mapping"
" is missing"));
free = NULL;
pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
pmap_invalidate_page(pmap, trunc_4mpage(va));
pmap_free_zero_pages(free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return (FALSE);
/*
* Invalidate the 2- or 4MB page mapping and return
* "failure" if the mapping was never accessed or the
* allocation of the new page table page fails.
*/
if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
VM_ALLOC_WIRED)) == NULL) {
free = NULL;
pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
pmap_invalidate_page(pmap, trunc_4mpage(va));
pmap_free_zero_pages(free);
CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
" in pmap %p", va, pmap);
return (FALSE);
}
if (va < VM_MAXUSER_ADDRESS)
pmap->pm_stats.resident_count++;
}
mptepa = VM_PAGE_TO_PHYS(mpte);
@ -2348,30 +2381,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
}
firstpte = PADDR2;
}
oldpde = *pde;
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
KASSERT((oldpde & PG_A) != 0,
("pmap_demote_pde: oldpde is missing PG_A"));
KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
("pmap_demote_pde: oldpde is missing PG_M"));
KASSERT((oldpde & PG_PS) != 0,
("pmap_demote_pde: oldpde is missing PG_PS"));
newpte = oldpde & ~PG_PS;
if ((newpte & PG_PDE_PAT) != 0)
newpte ^= PG_PDE_PAT | PG_PTE_PAT;
/*
* If the mapping has changed attributes, update the page table
* entries.
*/
* If the page table page is new, initialize it.
*/
if (mpte->wire_count == 1) {
mpte->wire_count = NPTEPG;
pmap_fill_ptp(firstpte, newpte);
}
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
("pmap_demote_pde: firstpte and newpte map different physical"
" addresses"));
/*
* If the mapping has changed attributes, update the page table
* entries.
*/
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
*pte = newpte;
newpte += PAGE_SIZE;
}
pmap_fill_ptp(firstpte, newpte);
/*
* Demote the mapping. This pmap is locked. The old PDE has
@ -4426,6 +4461,40 @@ pmap_clear_reference(vm_page_t m)
* Miscellaneous support routines follow
*/
/* Adjust the cache mode for a 4KB page mapped via a PTE. */
static __inline void
pmap_pte_attr(pt_entry_t *pte, int cache_bits)
{
u_int opte, npte;
/*
* The cache mode bits are all in the low 32-bits of the
* PTE, so we can just spin on updating the low 32-bits.
*/
do {
opte = *(u_int *)pte;
npte = opte & ~PG_PTE_CACHE;
npte |= cache_bits;
} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
}
/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
static __inline void
pmap_pde_attr(pd_entry_t *pde, int cache_bits)
{
u_int opde, npde;
/*
* The cache mode bits are all in the low 32-bits of the
* PDE, so we can just spin on updating the low 32-bits.
*/
do {
opde = *(u_int *)pde;
npde = opde & ~PG_PDE_CACHE;
npde |= cache_bits;
} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
}
/*
* Map a set of physical memory pages into the kernel virtual
* address space. Return a pointer to where it is mapped. This
@ -4537,13 +4606,23 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
}
}
/*
* Changes the specified virtual address range's memory type to that given by
* the parameter "mode". The specified virtual address range must be
* completely contained within either the kernel map.
*
* Returns zero if the change completed successfully, and either EINVAL or
* ENOMEM if the change failed. Specifically, EINVAL is returned if some part
* of the virtual address range was not mapped, and ENOMEM is returned if
* there was insufficient memory available to complete the change.
*/
int
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
{
vm_offset_t base, offset, tmpva;
pt_entry_t *pte;
u_int opte, npte;
pd_entry_t *pde;
pt_entry_t *pte;
int cache_bits_pte, cache_bits_pde;
boolean_t changed;
base = trunc_page(va);
@ -4556,47 +4635,84 @@ pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
if (base < VM_MIN_KERNEL_ADDRESS)
return (EINVAL);
/* 4MB pages and pages that aren't mapped aren't supported. */
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
pde = pmap_pde(kernel_pmap, tmpva);
if (*pde & PG_PS)
return (EINVAL);
if (*pde == 0)
return (EINVAL);
pte = vtopte(tmpva);
if (*pte == 0)
return (EINVAL);
}
cache_bits_pde = pmap_cache_bits(mode, 1);
cache_bits_pte = pmap_cache_bits(mode, 0);
changed = FALSE;
/*
* Ok, all the pages exist and are 4k, so run through them updating
* their cache mode.
* Pages that aren't mapped aren't supported. Also break down
* 2/4MB pages into 4KB pages if required.
*/
for (tmpva = base; size > 0; ) {
pte = vtopte(tmpva);
PMAP_LOCK(kernel_pmap);
for (tmpva = base; tmpva < base + size; ) {
pde = pmap_pde(kernel_pmap, tmpva);
if (*pde == 0) {
PMAP_UNLOCK(kernel_pmap);
return (EINVAL);
}
if (*pde & PG_PS) {
/*
* If the current 2/4MB page already has
* the required memory type, then we need not
* demote this page. Just increment tmpva to
* the next 2/4MB page frame.
*/
if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
tmpva = trunc_4mpage(tmpva) + NBPDR;
continue;
}
/*
* The cache mode bits are all in the low 32-bits of the
* PTE, so we can just spin on updating the low 32-bits.
*/
do {
opte = *(u_int *)pte;
npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
npte |= pmap_cache_bits(mode, 0);
} while (npte != opte &&
!atomic_cmpset_int((u_int *)pte, opte, npte));
if (npte != opte)
changed = TRUE;
/*
* If the current offset aligns with a 2/4MB
* page frame and there is at least 2/4MB left
* within the range, then we need not break
* down this page into 4KB pages.
*/
if ((tmpva & PDRMASK) == 0 &&
tmpva + PDRMASK < base + size) {
tmpva += NBPDR;
continue;
}
if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
PMAP_UNLOCK(kernel_pmap);
return (ENOMEM);
}
}
pte = vtopte(tmpva);
if (*pte == 0) {
PMAP_UNLOCK(kernel_pmap);
return (EINVAL);
}
tmpva += PAGE_SIZE;
size -= PAGE_SIZE;
}
PMAP_UNLOCK(kernel_pmap);
/*
* Ok, all the pages exist, so run through them updating their
* cache mode if required.
*/
for (tmpva = base; tmpva < base + size; ) {
pde = pmap_pde(kernel_pmap, tmpva);
if (*pde & PG_PS) {
if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
pmap_pde_attr(pde, cache_bits_pde);
changed = TRUE;
}
tmpva = trunc_4mpage(tmpva) + NBPDR;
} else {
pte = vtopte(tmpva);
if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
pmap_pte_attr(pte, cache_bits_pte);
changed = TRUE;
}
tmpva += PAGE_SIZE;
}
}
/*
* Flush CPU caches to make sure any data isn't cached that shouldn't
* be, etc.
*/
* Flush CPU caches to make sure any data isn't cached that
* shouldn't be, etc.
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
pmap_invalidate_cache_range(base, tmpva);

View file

@ -81,6 +81,10 @@
#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
/* Page level cache control fields used to determine the PAT type */
#define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
#define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
/*
* Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding
* 4KB (PTE) page mappings have identical settings for the following fields: