Add 64 byte linesize cache flushing routines for L1 instruction, L1 data

and L2 data caches.

Sponsored by:	HEIF5
This commit is contained in:
Ruslan Bukin 2015-03-26 14:51:24 +00:00
parent a4879be402
commit 0276459325
4 changed files with 423 additions and 11 deletions

View file

@ -43,37 +43,50 @@ void mipsNN_cache_init(struct mips_cpuinfo *);
void mipsNN_icache_sync_all_16(void);
void mipsNN_icache_sync_all_32(void);
void mipsNN_icache_sync_all_64(void);
void mipsNN_icache_sync_all_128(void);
void mipsNN_icache_sync_range_16(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_32(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_64(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_128(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_index_16(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_index_32(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_index_64(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_index_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_all_16(void);
void mipsNN_pdcache_wbinv_all_32(void);
void mipsNN_pdcache_wbinv_all_64(void);
void mipsNN_pdcache_wbinv_all_128(void);
void mipsNN_pdcache_wbinv_range_16(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_32(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_64(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_index_16(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_index_32(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_index_64(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_index_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_inv_range_16(vm_offset_t, vm_size_t);
void mipsNN_pdcache_inv_range_32(vm_offset_t, vm_size_t);
void mipsNN_pdcache_inv_range_64(vm_offset_t, vm_size_t);
void mipsNN_pdcache_inv_range_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wb_range_16(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wb_range_32(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_all_128(void);
void mipsNN_icache_sync_range_128(vm_offset_t, vm_size_t);
void mipsNN_icache_sync_range_index_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_all_128(void);
void mipsNN_pdcache_wbinv_range_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wbinv_range_index_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_inv_range_128(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wb_range_64(vm_offset_t, vm_size_t);
void mipsNN_pdcache_wb_range_128(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_all_32(void);
void mipsNN_sdcache_wbinv_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_index_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_inv_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wb_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_all_64(void);
void mipsNN_sdcache_wbinv_all_128(void);
void mipsNN_sdcache_wbinv_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_64(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_128(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_index_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_index_64(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wbinv_range_index_128(vm_offset_t, vm_size_t);
void mipsNN_sdcache_inv_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_inv_range_64(vm_offset_t, vm_size_t);
void mipsNN_sdcache_inv_range_128(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wb_range_32(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wb_range_64(vm_offset_t, vm_size_t);
void mipsNN_sdcache_wb_range_128(vm_offset_t, vm_size_t);
#endif /* _MACHINE_CACHE_MIPSNN_H_ */

View file

@ -113,6 +113,25 @@ do { \
: "memory"); \
} while (/*CONSTCOND*/0)
/*
* cache_r4k_op_8lines_64:
*
* Perform the specified cache operation on 8 64-byte cache lines.
*/
#define cache_r4k_op_8lines_64(va, op) \
do { \
__asm __volatile( \
".set noreorder \n\t" \
"cache %1, 0x000(%0); cache %1, 0x040(%0) \n\t" \
"cache %1, 0x080(%0); cache %1, 0x0c0(%0) \n\t" \
"cache %1, 0x100(%0); cache %1, 0x140(%0) \n\t" \
"cache %1, 0x180(%0); cache %1, 0x1c0(%0) \n\t" \
".set reorder" \
: \
: "r" (va), "i" (op) \
: "memory"); \
} while (/*CONSTCOND*/0)
/*
* cache_r4k_op_32lines_16:
*
@ -177,6 +196,38 @@ do { \
: "memory"); \
} while (/*CONSTCOND*/0)
/*
* cache_r4k_op_32lines_64:
*
* Perform the specified cache operation on 32 64-byte
* cache lines.
*/
#define cache_r4k_op_32lines_64(va, op) \
do { \
__asm __volatile( \
".set noreorder \n\t" \
"cache %1, 0x000(%0); cache %1, 0x040(%0); \n\t" \
"cache %1, 0x080(%0); cache %1, 0x0c0(%0); \n\t" \
"cache %1, 0x100(%0); cache %1, 0x140(%0); \n\t" \
"cache %1, 0x180(%0); cache %1, 0x1c0(%0); \n\t" \
"cache %1, 0x200(%0); cache %1, 0x240(%0); \n\t" \
"cache %1, 0x280(%0); cache %1, 0x2c0(%0); \n\t" \
"cache %1, 0x300(%0); cache %1, 0x340(%0); \n\t" \
"cache %1, 0x380(%0); cache %1, 0x3c0(%0); \n\t" \
"cache %1, 0x400(%0); cache %1, 0x440(%0); \n\t" \
"cache %1, 0x480(%0); cache %1, 0x4c0(%0); \n\t" \
"cache %1, 0x500(%0); cache %1, 0x540(%0); \n\t" \
"cache %1, 0x580(%0); cache %1, 0x5c0(%0); \n\t" \
"cache %1, 0x600(%0); cache %1, 0x640(%0); \n\t" \
"cache %1, 0x680(%0); cache %1, 0x6c0(%0); \n\t" \
"cache %1, 0x700(%0); cache %1, 0x740(%0); \n\t" \
"cache %1, 0x780(%0); cache %1, 0x7c0(%0); \n\t" \
".set reorder" \
: \
: "r" (va), "i" (op) \
: "memory"); \
} while (/*CONSTCOND*/0)
/*
* cache_r4k_op_32lines_128:
*

View file

@ -104,6 +104,13 @@ mips_config_cache(struct mips_cpuinfo * cpuinfo)
mips_cache_ops.mco_icache_sync_range_index =
mipsNN_icache_sync_range_index_32;
break;
case 64:
mips_cache_ops.mco_icache_sync_all = mipsNN_icache_sync_all_64;
mips_cache_ops.mco_icache_sync_range =
mipsNN_icache_sync_range_64;
mips_cache_ops.mco_icache_sync_range_index =
mipsNN_icache_sync_range_index_64;
break;
case 128:
mips_cache_ops.mco_icache_sync_all = mipsNN_icache_sync_all_128;
mips_cache_ops.mco_icache_sync_range =
@ -170,6 +177,21 @@ mips_config_cache(struct mips_cpuinfo * cpuinfo)
mipsNN_pdcache_wb_range_32;
#endif
break;
case 64:
mips_cache_ops.mco_pdcache_wbinv_all =
mips_cache_ops.mco_intern_pdcache_wbinv_all =
mipsNN_pdcache_wbinv_all_64;
mips_cache_ops.mco_pdcache_wbinv_range =
mipsNN_pdcache_wbinv_range_64;
mips_cache_ops.mco_pdcache_wbinv_range_index =
mips_cache_ops.mco_intern_pdcache_wbinv_range_index =
mipsNN_pdcache_wbinv_range_index_64;
mips_cache_ops.mco_pdcache_inv_range =
mipsNN_pdcache_inv_range_64;
mips_cache_ops.mco_pdcache_wb_range =
mips_cache_ops.mco_intern_pdcache_wb_range =
mipsNN_pdcache_wb_range_64;
break;
case 128:
mips_cache_ops.mco_pdcache_wbinv_all =
mips_cache_ops.mco_intern_pdcache_wbinv_all =
@ -275,6 +297,18 @@ mips_config_cache(struct mips_cpuinfo * cpuinfo)
mips_cache_ops.mco_sdcache_wb_range =
mipsNN_sdcache_wb_range_32;
break;
case 64:
mips_cache_ops.mco_sdcache_wbinv_all =
mipsNN_sdcache_wbinv_all_64;
mips_cache_ops.mco_sdcache_wbinv_range =
mipsNN_sdcache_wbinv_range_64;
mips_cache_ops.mco_sdcache_wbinv_range_index =
mipsNN_sdcache_wbinv_range_index_64;
mips_cache_ops.mco_sdcache_inv_range =
mipsNN_sdcache_inv_range_64;
mips_cache_ops.mco_sdcache_wb_range =
mipsNN_sdcache_wb_range_64;
break;
case 128:
mips_cache_ops.mco_sdcache_wbinv_all =
mipsNN_sdcache_wbinv_all_128;

View file

@ -52,6 +52,9 @@ __FBSDID("$FreeBSD$");
#define round_line32(x) (((x) + 31) & ~31)
#define trunc_line32(x) ((x) & ~31)
#define round_line64(x) (((x) + 63) & ~63)
#define trunc_line64(x) ((x) & ~63)
#define round_line128(x) (((x) + 127) & ~127)
#define trunc_line128(x) ((x) & ~127)
@ -212,6 +215,29 @@ mipsNN_icache_sync_all_32(void)
SYNC;
}
void
mipsNN_icache_sync_all_64(void)
{
vm_offset_t va, eva;
va = MIPS_PHYS_TO_KSEG0(0);
eva = va + picache_size;
/*
* Since we're hitting the whole thing, we don't have to
* worry about the N different "ways".
*/
mips_intern_dcache_wbinv_all();
while (va < eva) {
cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
va += (32 * 64);
}
SYNC;
}
void
mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
{
@ -258,6 +284,29 @@ mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
SYNC;
}
void
mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva;
eva = round_line64(va + size);
va = trunc_line64(va);
mips_intern_dcache_wb_range(va, (eva - va));
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
va += 64;
}
SYNC;
}
void
mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
{
@ -344,6 +393,49 @@ mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
}
}
void
mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva, tmpva;
int i, stride, loopcount;
/*
* Since we're doing Index ops, we expect to not be able
* to access the address we've been given. So, get the
* bits that determine the cache index, and make a KSEG0
* address out of them.
*/
va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
eva = round_line64(va + size);
va = trunc_line64(va);
/*
* GCC generates better code in the loops if we reference local
* copies of these global variables.
*/
stride = picache_stride;
loopcount = picache_loopcount;
mips_intern_dcache_wbinv_range_index(va, (eva - va));
while ((eva - va) >= (8 * 64)) {
tmpva = va;
for (i = 0; i < loopcount; i++, tmpva += stride)
cache_r4k_op_8lines_64(tmpva,
CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
va += 8 * 64;
}
while (va < eva) {
tmpva = va;
for (i = 0; i < loopcount; i++, tmpva += stride)
cache_op_r4k_line(tmpva,
CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
va += 64;
}
}
void
mipsNN_pdcache_wbinv_all_16(void)
{
@ -388,6 +480,28 @@ mipsNN_pdcache_wbinv_all_32(void)
SYNC;
}
void
mipsNN_pdcache_wbinv_all_64(void)
{
vm_offset_t va, eva;
va = MIPS_PHYS_TO_KSEG0(0);
eva = va + pdcache_size;
/*
* Since we're hitting the whole thing, we don't have to
* worry about the N different "ways".
*/
while (va < eva) {
cache_r4k_op_32lines_64(va,
CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
va += (32 * 64);
}
SYNC;
}
void
mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
{
@ -432,6 +546,28 @@ mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
SYNC;
}
void
mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva;
eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va,
CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
va += 64;
}
SYNC;
}
void
mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
{
@ -513,6 +649,47 @@ mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
va += 32;
}
}
void
mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva, tmpva;
int i, stride, loopcount;
/*
* Since we're doing Index ops, we expect to not be able
* to access the address we've been given. So, get the
* bits that determine the cache index, and make a KSEG0
* address out of them.
*/
va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
eva = round_line64(va + size);
va = trunc_line64(va);
/*
* GCC generates better code in the loops if we reference local
* copies of these global variables.
*/
stride = pdcache_stride;
loopcount = pdcache_loopcount;
while ((eva - va) >= (8 * 64)) {
tmpva = va;
for (i = 0; i < loopcount; i++, tmpva += stride)
cache_r4k_op_8lines_64(tmpva,
CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
va += 8 * 64;
}
while (va < eva) {
tmpva = va;
for (i = 0; i < loopcount; i++, tmpva += stride)
cache_op_r4k_line(tmpva,
CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
va += 64;
}
}
void
mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
@ -556,6 +733,27 @@ mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
SYNC;
}
void
mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva;
eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
va += 64;
}
SYNC;
}
void
mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
{
@ -598,6 +796,26 @@ mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
SYNC;
}
void
mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva;
eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
va += 64;
}
SYNC;
}
#ifdef CPU_CNMIPS
@ -881,6 +1099,19 @@ mipsNN_sdcache_wbinv_all_32(void)
}
}
void
mipsNN_sdcache_wbinv_all_64(void)
{
vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
vm_offset_t eva = va + sdcache_size;
while (va < eva) {
cache_r4k_op_32lines_64(va,
CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
va += (32 * 64);
}
}
void
mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
{
@ -900,6 +1131,25 @@ mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
}
}
void
mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va,
CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
va += 64;
}
}
void
mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
{
@ -928,6 +1178,34 @@ mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
}
}
void
mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva;
/*
* Since we're doing Index ops, we expect to not be able
* to access the address we've been given. So, get the
* bits that determine the cache index, and make a KSEG0
* address out of them.
*/
va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va,
CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
va += 64;
}
}
void
mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
{
@ -946,6 +1224,24 @@ mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
}
}
void
mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
va += 64;
}
}
void
mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
{
@ -964,6 +1260,24 @@ mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
}
}
void
mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
{
vm_offset_t eva = round_line64(va + size);
va = trunc_line64(va);
while ((eva - va) >= (32 * 64)) {
cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
va += (32 * 64);
}
while (va < eva) {
cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
va += 64;
}
}
void
mipsNN_sdcache_wbinv_all_128(void)
{