diff options
Diffstat (limited to 'sys/amd64/vmm/intel/ept.c')
-rw-r--r-- | sys/amd64/vmm/intel/ept.c | 304 |
1 files changed, 54 insertions, 250 deletions
diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c index 1ff158025f8d..18e90f369be3 100644 --- a/sys/amd64/vmm/intel/ept.c +++ b/sys/amd64/vmm/intel/ept.c @@ -29,32 +29,31 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/param.h> +#include <sys/kernel.h> #include <sys/types.h> -#include <sys/errno.h> #include <sys/systm.h> -#include <sys/malloc.h> #include <sys/smp.h> +#include <sys/sysctl.h> #include <vm/vm.h> #include <vm/pmap.h> - -#include <machine/param.h> -#include <machine/cpufunc.h> -#include <machine/pmap.h> -#include <machine/vmparam.h> +#include <vm/vm_extern.h> #include <machine/vmm.h> + #include "vmx_cpufunc.h" #include "vmx_msr.h" -#include "vmx.h" #include "ept.h" +#define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0)) #define EPT_PWL4(cap) ((cap) & (1UL << 6)) #define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14)) #define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */ #define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */ -#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32)) #define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20)) +#define AD_BITS_SUPPORTED(cap) ((cap) & (1UL << 21)) +#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32)) #define INVVPID_ALL_TYPES_MASK 0xF0000000000UL #define INVVPID_ALL_TYPES_SUPPORTED(cap) \ @@ -64,28 +63,22 @@ __FBSDID("$FreeBSD$"); #define INVEPT_ALL_TYPES_SUPPORTED(cap) \ (((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK) -#define EPT_PG_RD (1 << 0) -#define EPT_PG_WR (1 << 1) -#define EPT_PG_EX (1 << 2) -#define EPT_PG_MEMORY_TYPE(x) ((x) << 3) -#define EPT_PG_IGNORE_PAT (1 << 6) -#define EPT_PG_SUPERPAGE (1 << 7) - -#define EPT_ADDR_MASK ((uint64_t)-1 << 12) +#define EPT_PWLEVELS 4 /* page walk levels */ +#define EPT_ENABLE_AD_BITS (1 << 6) -MALLOC_DECLARE(M_VMX); +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW, NULL, NULL); -static uint64_t page_sizes_mask; +static int ept_enable_ad_bits; -/* - * Set this to 1 to have the EPT tables respect the guest PAT settings - */ -static int ept_pat_passthru; +static int ept_pmap_flags; +SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_flags, CTLFLAG_RD, + &ept_pmap_flags, 0, NULL); int ept_init(void) { - int page_shift; + int use_hw_ad_bits, use_superpages, use_exec_only; uint64_t cap; cap = rdmsr(MSR_VMX_EPT_VPID_CAP); @@ -105,17 +98,22 @@ ept_init(void) !INVEPT_ALL_TYPES_SUPPORTED(cap)) return (EINVAL); - /* Set bits in 'page_sizes_mask' for each valid page size */ - page_shift = PAGE_SHIFT; - page_sizes_mask = 1UL << page_shift; /* 4KB page */ + use_superpages = 1; + TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages); + if (use_superpages && EPT_PDE_SUPERPAGE(cap)) + ept_pmap_flags |= PMAP_PDE_SUPERPAGE; /* 2MB superpage */ - page_shift += 9; - if (EPT_PDE_SUPERPAGE(cap)) - page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */ + use_hw_ad_bits = 1; + TUNABLE_INT_FETCH("hw.vmm.ept.use_hw_ad_bits", &use_hw_ad_bits); + if (use_hw_ad_bits && AD_BITS_SUPPORTED(cap)) + ept_enable_ad_bits = 1; + else + ept_pmap_flags |= PMAP_EMULATE_AD_BITS; - page_shift += 9; - if (EPT_PDPTE_SUPERPAGE(cap)) - page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */ + use_exec_only = 1; + TUNABLE_INT_FETCH("hw.vmm.ept.use_exec_only", &use_exec_only); + if (use_exec_only && EPT_SUPPORTS_EXEC_ONLY(cap)) + ept_pmap_flags |= PMAP_SUPPORTS_EXEC_ONLY; return (0); } @@ -154,247 +152,53 @@ ept_dump(uint64_t *ptp, int nlevels) } #endif -static size_t -ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length, - vm_memattr_t attr, vm_prot_t prot, boolean_t spok) -{ - int spshift, ptpshift, ptpindex, nlevels; - - /* - * Compute the size of the mapping that we can accomodate. - * - * This is based on three factors: - * - super page sizes supported by the processor - * - alignment of the region starting at 'gpa' and 'hpa' - * - length of the region 'len' - */ - spshift = PAGE_SHIFT; - if (spok) - spshift += (EPT_PWLEVELS - 1) * 9; - while (spshift >= PAGE_SHIFT) { - uint64_t spsize = 1UL << spshift; - if ((page_sizes_mask & spsize) != 0 && - (gpa & (spsize - 1)) == 0 && - (hpa & (spsize - 1)) == 0 && - length >= spsize) { - break; - } - spshift -= 9; - } - - if (spshift < PAGE_SHIFT) { - panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, " - "length 0x%016lx, page_sizes_mask 0x%016lx", - gpa, hpa, length, page_sizes_mask); - } - - nlevels = EPT_PWLEVELS; - while (--nlevels >= 0) { - ptpshift = PAGE_SHIFT + nlevels * 9; - ptpindex = (gpa >> ptpshift) & 0x1FF; - - /* We have reached the leaf mapping */ - if (spshift >= ptpshift) - break; - - /* - * We are working on a non-leaf page table page. - * - * Create the next level page table page if necessary and point - * to it from the current page table. - */ - if (ptp[ptpindex] == 0) { - void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO); - ptp[ptpindex] = vtophys(nlp); - ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX; - } - - /* Work our way down to the next level page table page */ - ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK); - } - - if ((gpa & ((1UL << ptpshift) - 1)) != 0) { - panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d " - "mismatch\n", gpa, ptpshift); - } - - if (prot != VM_PROT_NONE) { - /* Do the mapping */ - ptp[ptpindex] = hpa; - - /* Apply the access controls */ - if (prot & VM_PROT_READ) - ptp[ptpindex] |= EPT_PG_RD; - if (prot & VM_PROT_WRITE) - ptp[ptpindex] |= EPT_PG_WR; - if (prot & VM_PROT_EXECUTE) - ptp[ptpindex] |= EPT_PG_EX; - - /* - * By default the PAT type is ignored - this appears to - * be how other hypervisors handle EPT. Allow this to be - * overridden. - */ - ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr); - if (!ept_pat_passthru) - ptp[ptpindex] |= EPT_PG_IGNORE_PAT; - - if (nlevels > 0) - ptp[ptpindex] |= EPT_PG_SUPERPAGE; - } else { - /* Remove the mapping */ - ptp[ptpindex] = 0; - } - - return (1UL << ptpshift); -} - -static vm_paddr_t -ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa) -{ - int nlevels, ptpshift, ptpindex; - uint64_t ptpval, hpabase, pgmask; - - nlevels = EPT_PWLEVELS; - while (--nlevels >= 0) { - ptpshift = PAGE_SHIFT + nlevels * 9; - ptpindex = (gpa >> ptpshift) & 0x1FF; - - ptpval = ptp[ptpindex]; - - /* Cannot make progress beyond this point */ - if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0) - break; - - if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) { - pgmask = (1UL << ptpshift) - 1; - hpabase = ptpval & ~pgmask; - return (hpabase | (gpa & pgmask)); - } - - /* Work our way down to the next level page table page */ - ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK); - } - - return ((vm_paddr_t)-1); -} - static void -ept_free_pt_entry(pt_entry_t pte) +invept_single_context(void *arg) { - if (pte == 0) - return; - - /* sanity check */ - if ((pte & EPT_PG_SUPERPAGE) != 0) - panic("ept_free_pt_entry: pte cannot have superpage bit"); + struct invept_desc desc = *(struct invept_desc *)arg; - return; + invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); } -static void -ept_free_pd_entry(pd_entry_t pde) +void +ept_invalidate_mappings(u_long eptp) { - pt_entry_t *pt; - int i; + struct invept_desc invept_desc = { 0 }; - if (pde == 0) - return; + invept_desc.eptp = eptp; - if ((pde & EPT_PG_SUPERPAGE) == 0) { - pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK); - for (i = 0; i < NPTEPG; i++) - ept_free_pt_entry(pt[i]); - free(pt, M_VMX); /* free the page table page */ - } + smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); } -static void -ept_free_pdp_entry(pdp_entry_t pdpe) +static int +ept_pinit(pmap_t pmap) { - pd_entry_t *pd; - int i; - if (pdpe == 0) - return; - - if ((pdpe & EPT_PG_SUPERPAGE) == 0) { - pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK); - for (i = 0; i < NPDEPG; i++) - ept_free_pd_entry(pd[i]); - free(pd, M_VMX); /* free the page directory page */ - } + return (pmap_pinit_type(pmap, PT_EPT, ept_pmap_flags)); } -static void -ept_free_pml4_entry(pml4_entry_t pml4e) +struct vmspace * +ept_vmspace_alloc(vm_offset_t min, vm_offset_t max) { - pdp_entry_t *pdp; - int i; - - if (pml4e == 0) - return; - if ((pml4e & EPT_PG_SUPERPAGE) == 0) { - pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK); - for (i = 0; i < NPDPEPG; i++) - ept_free_pdp_entry(pdp[i]); - free(pdp, M_VMX); /* free the page directory ptr page */ - } + return (vmspace_alloc(min, max, ept_pinit)); } void -ept_vmcleanup(struct vmx *vmx) -{ - int i; - - for (i = 0; i < NPML4EPG; i++) - ept_free_pml4_entry(vmx->pml4ept[i]); -} - -int -ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len, - vm_memattr_t attr, int prot, boolean_t spok) +ept_vmspace_free(struct vmspace *vmspace) { - size_t n; - struct vmx *vmx = arg; - - while (len > 0) { - n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr, - prot, spok); - len -= n; - gpa += n; - hpa += n; - } - return (0); + vmspace_free(vmspace); } -vm_paddr_t -ept_vmmmap_get(void *arg, vm_paddr_t gpa) +uint64_t +eptp(uint64_t pml4) { - vm_paddr_t hpa; - struct vmx *vmx; - - vmx = arg; - hpa = ept_lookup_mapping(vmx->pml4ept, gpa); - return (hpa); -} + uint64_t eptp_val; -static void -invept_single_context(void *arg) -{ - struct invept_desc desc = *(struct invept_desc *)arg; + eptp_val = pml4 | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK; + if (ept_enable_ad_bits) + eptp_val |= EPT_ENABLE_AD_BITS; - invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); -} - -void -ept_invalidate_mappings(u_long pml4ept) -{ - struct invept_desc invept_desc = { 0 }; - - invept_desc.eptp = EPTP(pml4ept); - - smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); + return (eptp_val); } |