Use the vis block copy/zero functions for pmap_copy_page and pmap_zero_page.

These are called through function pointers so that different implementations
can be provided for cheetah, where the block load instructions may or may
not be a win, and so they can be disabled with the machdep.use_vis tunable.
In terms of raw bandwidth the integer versions are faster, but not allocating
lines in the L2 cache for useless data gives a measurable improvement in user
time for the benchmarks I tested (mostly buildworld with -j8).

As far as I can tell the instructions used are implemented on everything
back to UltraSPARC I, so there should not be a problem with different cpu
types.
This commit is contained in:
Jake Burkholder 2003-04-06 17:05:26 +00:00
parent 3e9a6ab3a1
commit 92fed30a07
4 changed files with 29 additions and 13 deletions

View file

@ -196,9 +196,6 @@ void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len);
void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len);
void aszero(u_long asi, vm_offset_t dst, size_t len);
void spitfire_block_copy(void *src, void *dst, size_t len);
void spitfire_block_zero(void *dst, size_t len);
/*
* Ultrasparc II doesn't implement popc in hardware. Suck.
*/

View file

@ -33,6 +33,9 @@
#ifndef _MACHINE_MD_VAR_H_
#define _MACHINE_MD_VAR_H_
typedef void cpu_block_copy_t(const void *src, void *dst, size_t len);
typedef void cpu_block_zero_t(void *dst, size_t len);
extern char tl0_base[];
extern char _end[];
@ -41,10 +44,6 @@ extern long Maxmem;
extern vm_offset_t kstack0;
extern vm_offset_t kstack0_phys;
struct dbreg;
struct fpreg;
struct thread;
struct reg;
struct pcpu;
void cpu_halt(void);
@ -54,4 +53,10 @@ void cpu_setregs(struct pcpu *pc);
int is_physical_memory(vm_offset_t addr);
void swi_vm(void *v);
cpu_block_copy_t spitfire_block_copy;
cpu_block_zero_t spitfire_block_zero;
extern cpu_block_copy_t *cpu_block_copy;
extern cpu_block_zero_t *cpu_block_zero;
#endif /* !_MACHINE_MD_VAR_H_ */

View file

@ -132,6 +132,11 @@ static struct timecounter tick_tc;
char sparc64_model[32];
static int cpu_use_vis = 1;
cpu_block_copy_t *cpu_block_copy;
cpu_block_zero_t *cpu_block_zero;
static timecounter_get_t tick_get_timecount;
void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3,
ofw_vec_t *vec);
@ -280,6 +285,15 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec)
cache_init(child);
getenv_int("machdep.use_vis", &cpu_use_vis);
if (cpu_use_vis) {
cpu_block_copy = spitfire_block_copy;
cpu_block_zero = spitfire_block_zero;
} else {
cpu_block_copy = bcopy;
cpu_block_zero = bzero;
}
#ifdef DDB
kdb_init();
#endif

View file

@ -1645,14 +1645,14 @@ pmap_zero_page(vm_page_t m)
} else if (m->md.color == DCACHE_COLOR(pa)) {
PMAP_STATS_INC(pmap_nzero_page_c);
va = TLB_PHYS_TO_DIRECT(pa);
bzero((void *)va, PAGE_SIZE);
cpu_block_zero((void *)va, PAGE_SIZE);
} else {
PMAP_STATS_INC(pmap_nzero_page_oc);
va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
tp = tsb_kvtotte(va);
tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
tp->tte_vpn = TV_VPN(va, TS_8K);
bzero((void *)va, PAGE_SIZE);
cpu_block_zero((void *)va, PAGE_SIZE);
tlb_page_demap(kernel_pmap, va);
}
}
@ -1704,14 +1704,14 @@ pmap_zero_page_idle(vm_page_t m)
} else if (m->md.color == DCACHE_COLOR(pa)) {
PMAP_STATS_INC(pmap_nzero_page_idle_c);
va = TLB_PHYS_TO_DIRECT(pa);
bzero((void *)va, PAGE_SIZE);
cpu_block_zero((void *)va, PAGE_SIZE);
} else {
PMAP_STATS_INC(pmap_nzero_page_idle_oc);
va = pmap_idle_map + (m->md.color * PAGE_SIZE);
tp = tsb_kvtotte(va);
tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
tp->tte_vpn = TV_VPN(va, TS_8K);
bzero((void *)va, PAGE_SIZE);
cpu_block_zero((void *)va, PAGE_SIZE);
tlb_page_demap(kernel_pmap, va);
}
}
@ -1740,7 +1740,7 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
PMAP_STATS_INC(pmap_ncopy_page_c);
vdst = TLB_PHYS_TO_DIRECT(pdst);
vsrc = TLB_PHYS_TO_DIRECT(psrc);
bcopy((void *)vsrc, (void *)vdst, PAGE_SIZE);
cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
} else if (msrc->md.color == -1) {
if (mdst->md.color == DCACHE_COLOR(pdst)) {
PMAP_STATS_INC(pmap_ncopy_page_dc);
@ -1787,7 +1787,7 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
tp->tte_data =
TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
tp->tte_vpn = TV_VPN(vsrc, TS_8K);
bcopy((void *)vsrc, (void *)vdst, PAGE_SIZE);
cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
tlb_page_demap(kernel_pmap, vdst);
tlb_page_demap(kernel_pmap, vsrc);
}