From e011dc962cfe5acc567ccd7242355b0797c64e65 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Mon, 20 Oct 2014 18:09:33 +0000 Subject: [PATCH] Merge from projects/bhyve_svm all the changes outside vmm.ko or bhyve utilities: Add support for AMD's nested page tables in pmap.c: - Provide the correct bit mask for various bit fields in a PTE (e.g. valid bit) for a pmap of type PT_RVI. - Add a function 'pmap_type_guest(pmap)' that returns TRUE if the pmap is of type PT_EPT or PT_RVI. Add CPU_SET_ATOMIC_ACQ(num, cpuset): This is used when activating a vcpu in the nested pmap. Using the 'acquire' variant guarantees that the load of the 'pm_eptgen' will happen only after the vcpu is activated in 'pm_active'. Add defines for various AMD-specific MSRs. Submitted by: Anish Gupta (akgupt3@gmail.com) --- sys/amd64/amd64/pmap.c | 29 ++++++++++++++++++++++++----- sys/sys/bitset.h | 4 ++++ sys/sys/cpuset.h | 1 + sys/x86/include/specialreg.h | 14 ++++++++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index fadde28941c..291fd7647e4 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -145,6 +145,13 @@ __FBSDID("$FreeBSD$"); #include #endif +static __inline boolean_t +pmap_type_guest(pmap_t pmap) +{ + + return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); +} + static __inline boolean_t pmap_emulate_ad_bits(pmap_t pmap) { @@ -159,6 +166,7 @@ pmap_valid_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_V; break; case PT_EPT: @@ -181,6 +189,7 @@ pmap_rw_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_RW; break; case PT_EPT: @@ -205,6 +214,7 @@ pmap_global_bit(pmap_t pmap) case PT_X86: mask = X86_PG_G; break; + case PT_RVI: case PT_EPT: mask = 0; break; @@ -222,6 +232,7 @@ pmap_accessed_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_A; break; case PT_EPT: @@ -244,6 +255,7 @@ pmap_modified_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_M; break; case PT_EPT: @@ -1103,6 +1115,7 @@ pmap_swap_pat(pmap_t pmap, pt_entry_t entry) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: /* Verify that both PAT bits are not set at the same time */ KASSERT((entry & x86_pat_bits) != x86_pat_bits, ("Invalid PAT bits in entry %#lx", entry)); @@ -1138,6 +1151,7 @@ pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; @@ -1172,6 +1186,7 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE; break; case PT_EPT: @@ -1198,6 +1213,7 @@ pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde) switch (pmap->pm_type) { case PT_X86: break; + case PT_RVI: case PT_EPT: /* * XXX @@ -1233,7 +1249,7 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde) { pt_entry_t PG_G; - if (pmap->pm_type == PT_EPT) + if (pmap_type_guest(pmap)) return; KASSERT(pmap->pm_type == PT_X86, @@ -1347,7 +1363,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) cpuset_t other_cpus; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } @@ -1425,7 +1441,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) vm_offset_t addr; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } @@ -1484,7 +1500,7 @@ pmap_invalidate_all(pmap_t pmap) uint64_t cr3; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } @@ -1606,7 +1622,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); - if (pmap == kernel_pmap || pmap->pm_type == PT_EPT) + if (pmap == kernel_pmap || pmap_type_guest(pmap)) active = all_cpus; else { active = pmap->pm_active; @@ -1644,6 +1660,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; @@ -1663,6 +1680,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; @@ -1680,6 +1698,7 @@ pmap_invalidate_all(pmap_t pmap) if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; diff --git a/sys/sys/bitset.h b/sys/sys/bitset.h index 7c24ecd0443..e6c4dc374a3 100644 --- a/sys/sys/bitset.h +++ b/sys/sys/bitset.h @@ -135,6 +135,10 @@ atomic_set_long(&(p)->__bits[__bitset_word(_s, n)], \ __bitset_mask((_s), n)) +#define BIT_SET_ATOMIC_ACQ(_s, n, p) \ + atomic_set_acq_long(&(p)->__bits[__bitset_word(_s, n)], \ + __bitset_mask((_s), n)) + /* Convenience functions catering special cases. */ #define BIT_AND_ATOMIC(_s, d, s) do { \ __size_t __i; \ diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index ba2b7ceaa98..d8e7450ee43 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -55,6 +55,7 @@ #define CPU_NAND(d, s) BIT_NAND(CPU_SETSIZE, d, s) #define CPU_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(CPU_SETSIZE, n, p) #define CPU_SET_ATOMIC(n, p) BIT_SET_ATOMIC(CPU_SETSIZE, n, p) +#define CPU_SET_ATOMIC_ACQ(n, p) BIT_SET_ATOMIC_ACQ(CPU_SETSIZE, n, p) #define CPU_AND_ATOMIC(n, p) BIT_AND_ATOMIC(CPU_SETSIZE, n, p) #define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s) #define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t) diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index 349166f740d..e6079e77e79 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -81,6 +81,7 @@ #define EFER_LME 0x000000100 /* Long mode enable (R/W) */ #define EFER_LMA 0x000000400 /* Long mode active (R) */ #define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ +#define EFER_SVM 0x000001000 /* SVM enable bit for AMD, reserved for Intel */ /* * Intel Extended Features registers @@ -783,8 +784,21 @@ #define MSR_IORRMASK1 0xc0010019 #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ +#define MSR_NB_CFG1 0xc001001f /* NB configuration 1 */ +#define MSR_P_STATE_LIMIT 0xc0010061 /* P-state Current Limit Register */ +#define MSR_P_STATE_CONTROL 0xc0010062 /* P-state Control Register */ +#define MSR_P_STATE_STATUS 0xc0010063 /* P-state Status Register */ +#define MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */ +#define MSR_SMM_ADDR 0xc0010112 /* SMM TSEG base address */ +#define MSR_SMM_MASK 0xc0010113 /* SMM TSEG address mask */ +#define MSR_IC_CFG 0xc0011021 /* Instruction Cache Configuration */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ #define MSR_MC0_CTL_MASK 0xc0010044 +#define MSR_VM_CR 0xc0010114 /* SVM: feature control */ +#define MSR_VM_HSAVE_PA 0xc0010117 /* SVM: host save area address */ + +/* MSR_VM_CR related */ +#define VM_CR_SVMDIS 0x10 /* SVM: disabled by BIOS */ /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */