diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index 204306d063c..5a61c4a1f7c 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -100,6 +100,16 @@ _start: br x15 virtdone: + /* + * Now that we are in virtual address space, + * we don't need the identity mapping in TTBR0 and + * can set the TCR to a more useful value. + */ + ldr x2, tcr + mrs x3, id_aa64mmfr0_el1 + bfi x2, x3, #32, #3 + msr tcr_el1, x2 + /* Set up the stack */ adr x25, initstack_end mov sp, x25 @@ -167,7 +177,7 @@ ENTRY(mpentry) /* Load the kernel page table */ adr x26, pagetable_l1_ttbr1 /* Load the identity page table */ - adr x27, pagetable_l1_ttbr0 + adr x27, pagetable_l0_ttbr0 /* Enable the mmu */ bl start_mmu @@ -177,6 +187,16 @@ ENTRY(mpentry) br x15 mp_virtdone: + /* + * Now that we are in virtual address space, + * we don't need the identity mapping in TTBR0 and + * can set the TCR to a more useful value. + */ + ldr x2, tcr + mrs x3, id_aa64mmfr0_el1 + bfi x2, x3, #32, #3 + msr tcr_el1, x2 + ldr x4, =secondary_stacks mov x5, #(PAGE_SIZE * KSTACK_PAGES) mul x5, x0, x5 @@ -373,9 +393,9 @@ create_pagetables: */ add x27, x26, #PAGE_SIZE + mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) /* Create a table for the UART */ - mov x6, x27 /* The initial page table */ mov x7, #DEVICE_MEM mov x8, #(SOCDEV_VA) /* VA start */ mov x9, #(SOCDEV_PA) /* PA start */ @@ -384,17 +404,55 @@ create_pagetables: #endif /* Create the VA = PA map */ - mov x6, x27 /* The initial page table */ mov x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */ mov x9, x27 mov x8, x9 /* VA start (== PA start) */ mov x10, #1 bl build_l1_block_pagetable + /* Move to the l0 table */ + add x27, x27, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x27 + bl link_l0_pagetable + /* Restore the Link register */ mov x30, x5 ret +/* + * Builds an L0 -> L1 table descriptor + * + * This is a link for a 512GiB block of memory with up to 1GiB regions mapped + * within it by build_l1_block_pagetable. + * + * x6 = L0 table + * x8 = Virtual Address + * x9 = L1 PA (trashed) + * x11, x12 and x13 are trashed + */ +link_l0_pagetable: + /* + * Link an L0 -> L1 table entry. + */ + /* Find the table index */ + lsr x11, x8, #L0_SHIFT + and x11, x11, #Ln_ADDR_MASK + + /* Build the L0 block entry */ + mov x12, #L0_TABLE + + /* Only use the output address bits */ + lsr x9, x9, #12 + orr x12, x12, x9, lsl #12 + + /* Store the entry */ + str x12, [x6, x11, lsl #3] + + ret + /* * Builds an L1 -> L2 table descriptor * @@ -535,8 +593,13 @@ start_mmu: ldr x2, mair msr mair_el1, x2 - /* Setup TCR according to PARange bits from ID_AA64MMFR0_EL1 */ - ldr x2, tcr + /* + * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. + * Some machines have physical memory mapped >512GiB, which can not + * be identity-mapped using the default 39 VA bits. Thus, use + * 48 VA bits for now and switch back to 39 after the VA jump. + */ + ldr x2, tcr_early mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 msr tcr_el1, x2 @@ -559,6 +622,9 @@ mair: tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) +tcr_early: + .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \ + TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ @@ -586,6 +652,8 @@ pagetable_l1_ttbr1: .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE +pagetable_l0_ttbr0: + .space PAGE_SIZE pagetable_end: el2_pagetable: diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h index c70194257c2..de8ce40c5f2 100644 --- a/sys/arm64/include/armreg.h +++ b/sys/arm64/include/armreg.h @@ -231,7 +231,9 @@ #define TCR_T1SZ_SHIFT 16 #define TCR_T0SZ_SHIFT 0 -#define TCR_TxSZ(x) (((x) << TCR_T1SZ_SHIFT) | ((x) << TCR_T0SZ_SHIFT)) +#define TCR_T1SZ(x) ((x) << TCR_T1SZ_SHIFT) +#define TCR_T0SZ(x) ((x) << TCR_T0SZ_SHIFT) +#define TCR_TxSZ(x) (TCR_T1SZ(x) | TCR_T0SZ(x)) /* Saved Program Status Register */ #define DBG_SPSR_SS (0x1 << 21)