Remove all calls to cpu_dcache_wb_range from the arm64 pmap code. These

were unneeded as we tell the tlb the pagetables are in cached memory. This
gives us a small, but statistically significant improvement over just
removing the PTE_SYNC cases.

While here remove PTE_SYNC, it's now unneeded.

Sponsored by:	DARPA, AFRL
This commit is contained in:
Andrew Turner 2017-07-01 09:38:52 +00:00
parent eb36b1d0bc
commit 567dec3d2b

View file

@ -504,12 +504,6 @@ pmap_l3_valid(pt_entry_t l3)
CTASSERT(L1_BLOCK == L2_BLOCK);
#if 0
#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
#else
#define PTE_SYNC(pte) (void)0
#endif
/*
* Checks if the page is dirty. We currently lack proper tracking of this on
* arm64 so for now assume is a page mapped as rw was accessed it is.
@ -594,8 +588,6 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
dmap_phys_max = pa;
dmap_max_addr = va;
cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
PAGE_SIZE * DMAP_TABLES);
cpu_tlb_flushID();
}
@ -624,10 +616,6 @@ pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
/* Clean the L2 page table */
memset((void *)l2_start, 0, l2pt - l2_start);
cpu_dcache_wb_range(l2_start, l2pt - l2_start);
/* Flush the l1 table to ram */
cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
return l2pt;
}
@ -659,9 +647,6 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
/* Clean the L2 page table */
memset((void *)l3_start, 0, l3pt - l3_start);
cpu_dcache_wb_range(l3_start, l3pt - l3_start);
cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
return l3pt;
}
@ -1131,7 +1116,6 @@ pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
pte = pmap_l2_to_l3(pde, va);
pmap_load_store(pte, (pa & ~L3_OFFSET) | attr);
PTE_SYNC(pte);
va += PAGE_SIZE;
pa += PAGE_SIZE;
@ -1161,7 +1145,6 @@ pmap_kremove(vm_offset_t va)
KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
pmap_load_clear(pte);
PTE_SYNC(pte);
pmap_invalidate_page(kernel_pmap, va);
}
@ -1184,7 +1167,6 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
KASSERT(lvl == 3,
("Invalid device pagetable level: %d != 3", lvl));
pmap_load_clear(pte);
PTE_SYNC(pte);
va += PAGE_SIZE;
size -= PAGE_SIZE;
@ -1244,7 +1226,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
pa |= ATTR_XN;
pte = pmap_l2_to_l3(pde, va);
pmap_load_store(pte, pa);
PTE_SYNC(pte);
va += L3_SIZE;
}
@ -1271,7 +1252,6 @@ pmap_qremove(vm_offset_t sva, int count)
("Invalid device pagetable level: %d != 3", lvl));
if (pte != NULL) {
pmap_load_clear(pte);
PTE_SYNC(pte);
}
va += PAGE_SIZE;
@ -1343,21 +1323,18 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
l0 = pmap_l0(pmap, va);
pmap_load_clear(l0);
PTE_SYNC(l0);
} else if (m->pindex >= NUL2E) {
/* l2 page */
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
pmap_load_clear(l1);
PTE_SYNC(l1);
} else {
/* l3 page */
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
pmap_load_clear(l2);
PTE_SYNC(l2);
}
pmap_resident_count_dec(pmap, 1);
if (m->pindex < NUL2E) {
@ -1498,7 +1475,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
l0index = ptepindex - (NUL2E + NUL1E);
l0 = &pmap->pm_l0[l0index];
pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
PTE_SYNC(l0);
} else if (ptepindex >= NUL2E) {
vm_pindex_t l0index, l1index;
pd_entry_t *l0, *l1;
@ -1527,7 +1503,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
l1 = &l1[ptepindex & Ln_ADDR_MASK];
pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
PTE_SYNC(l1);
} else {
vm_pindex_t l0index, l1index;
pd_entry_t *l0, *l1, *l2;
@ -1574,7 +1549,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
l2 = &l2[ptepindex & Ln_ADDR_MASK];
pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
PTE_SYNC(l2);
}
pmap_resident_count_inc(pmap, 1);
@ -1727,7 +1701,6 @@ pmap_growkernel(vm_offset_t addr)
pmap_zero_page(nkpg);
paddr = VM_PAGE_TO_PHYS(nkpg);
pmap_load_store(l1, paddr | L1_TABLE);
PTE_SYNC(l1);
continue; /* try again */
}
l2 = pmap_l1_to_l2(l1, kernel_vm_end);
@ -1749,7 +1722,6 @@ pmap_growkernel(vm_offset_t addr)
pmap_zero_page(nkpg);
paddr = VM_PAGE_TO_PHYS(nkpg);
pmap_load_store(l2, paddr | L2_TABLE);
PTE_SYNC(l2);
pmap_invalidate_page(kernel_pmap, kernel_vm_end);
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
@ -1883,7 +1855,6 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
if ((tpte & ATTR_SW_WIRED) != 0)
continue;
tpte = pmap_load_clear(pte);
PTE_SYNC(pte);
pmap_invalidate_page(pmap, va);
m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
if (pmap_page_dirty(tpte))
@ -2272,7 +2243,6 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
old_l3 = pmap_load_clear(l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
if (old_l3 & ATTR_SW_WIRED)
pmap->pm_stats.wired_count -= 1;
@ -2493,7 +2463,6 @@ retry:
pte = pmap_l2_to_l3(pde, pv->pv_va);
tpte = pmap_load(pte);
pmap_load_clear(pte);
PTE_SYNC(pte);
pmap_invalidate_page(pmap, pv->pv_va);
if (tpte & ATTR_SW_WIRED)
pmap->pm_stats.wired_count--;
@ -2595,7 +2564,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
nbits |= ATTR_XN;
pmap_set(l3p, nbits);
PTE_SYNC(l3p);
/* XXX: Use pmap_invalidate_range */
pmap_invalidate_page(pmap, va);
}
@ -2657,12 +2625,10 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
/* Clear the old mapping */
pmap_load_clear(pte);
PTE_SYNC(pte);
pmap_invalidate_range(pmap, va, va + size);
/* Create the new mapping */
pmap_load_store(pte, newpte);
PTE_SYNC(pte);
critical_exit();
intr_restore(intr);
@ -2886,7 +2852,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
l1_pa = VM_PAGE_TO_PHYS(l1_m);
pmap_load_store(pde, l1_pa | L0_TABLE);
PTE_SYNC(pde);
/* FALLTHROUGH */
case 0:
/* Get the l1 pde to update */
@ -2903,7 +2868,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
l2_pa = VM_PAGE_TO_PHYS(l2_m);
pmap_load_store(pde, l2_pa | L1_TABLE);
PTE_SYNC(pde);
/* FALLTHROUGH */
case 1:
/* Get the l2 pde to update */
@ -2919,7 +2883,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
l3_pa = VM_PAGE_TO_PHYS(l3_m);
pmap_load_store(pde, l3_pa | L2_TABLE);
PTE_SYNC(pde);
break;
}
}
@ -3023,7 +2986,6 @@ validate:
}
} else {
pmap_load_store(l3, new_l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
if (pmap_page_dirty(orig_l3) &&
(orig_l3 & ATTR_SW_MANAGED) != 0)
@ -3033,7 +2995,6 @@ validate:
pmap_load_store(l3, new_l3);
}
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
if (pmap != pmap_kernel()) {
@ -3231,7 +3192,6 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
if ((m->oflags & VPO_UNMANAGED) == 0)
pa |= ATTR_SW_MANAGED;
pmap_load_store(l3, pa);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
return (mpte);
}
@ -3641,7 +3601,6 @@ pmap_remove_pages(pmap_t pmap)
(uintmax_t)tpte));
pmap_load_clear(pte);
PTE_SYNC(pte);
/*
* Update the vm_page_t clean/reference bits.
@ -4416,7 +4375,6 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
l2[i] = newl2 | phys;
phys += L2_SIZE;
}
cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK),
("Invalid l2 page (%lx != %lx)", l2[0],
(oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
@ -4494,7 +4452,6 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
l3[i] = newl3 | phys;
phys += L3_SIZE;
}
cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE);
}
KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE),
("Invalid l3 page (%lx != %lx)", l3[0],