diff options
Diffstat (limited to 'sys/arm64/vmm/vmm_arm64.c')
-rw-r--r-- | sys/arm64/vmm/vmm_arm64.c | 369 |
1 files changed, 221 insertions, 148 deletions
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c index e71761f9ccef..de2425aae0a1 100644 --- a/sys/arm64/vmm/vmm_arm64.c +++ b/sys/arm64/vmm/vmm_arm64.c @@ -58,6 +58,8 @@ #include <machine/hypervisor.h> #include <machine/pmap.h> +#include <dev/vmm/vmm_mem.h> + #include "mmu.h" #include "arm64.h" #include "hyp.h" @@ -65,6 +67,7 @@ #include "io/vgic.h" #include "io/vgic_v3.h" #include "io/vtimer.h" +#include "vmm_handlers.h" #include "vmm_stat.h" #define HANDLED 1 @@ -101,9 +104,6 @@ static vm_offset_t stack_hyp_va[MAXCPU]; static vmem_t *el2_mem_alloc; static void arm_setup_vectors(void *arg); -static void vmm_pmap_clean_stage2_tlbi(void); -static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool); -static void vmm_pmap_invalidate_all(uint64_t); DPCPU_DEFINE_STATIC(struct hypctx *, vcpu); @@ -130,20 +130,6 @@ arm_setup_vectors(void *arg) el2_regs = arg; arm64_set_active_vcpu(NULL); - daif = intr_disable(); - - /* - * Install the temporary vectors which will be responsible for - * initializing the VMM when we next trap into EL2. - * - * x0: the exception vector table responsible for hypervisor - * initialization on the next call. - */ - vmm_call_hyp(vtophys(&vmm_hyp_code)); - - /* Create and map the hypervisor stack */ - stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; - /* * Configure the system control register for EL2: * @@ -161,9 +147,27 @@ arm_setup_vectors(void *arg) sctlr_el2 |= SCTLR_EL2_WXN; sctlr_el2 &= ~SCTLR_EL2_EE; - /* Special call to initialize EL2 */ - vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, - sctlr_el2, el2_regs->vtcr_el2); + daif = intr_disable(); + + if (in_vhe()) { + WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2); + } else { + /* + * Install the temporary vectors which will be responsible for + * initializing the VMM when we next trap into EL2. + * + * x0: the exception vector table responsible for hypervisor + * initialization on the next call. + */ + vmm_call_hyp(vtophys(&vmm_hyp_code)); + + /* Create and map the hypervisor stack */ + stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE; + + /* Special call to initialize EL2 */ + vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2, + sctlr_el2, el2_regs->vtcr_el2); + } intr_restore(daif); } @@ -235,22 +239,15 @@ vmmops_modinit(int ipinum) vm_paddr_t vmm_base; uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field; uint64_t cnthctl_el2; - register_t daif; int cpu, i; bool rv __diagused; - if (!virt_enabled()) { + if (!has_hyp()) { printf( "vmm: Processor doesn't have support for virtualization\n"); return (ENXIO); } - /* TODO: Support VHE */ - if (in_vhe()) { - printf("vmm: VHE is unsupported\n"); - return (ENXIO); - } - if (!vgic_present()) { printf("vmm: No vgic found\n"); return (ENODEV); @@ -283,67 +280,72 @@ vmmops_modinit(int ipinum) } pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT; - /* Initialise the EL2 MMU */ - if (!vmmpmap_init()) { - printf("vmm: Failed to init the EL2 MMU\n"); - return (ENOMEM); + if (!in_vhe()) { + /* Initialise the EL2 MMU */ + if (!vmmpmap_init()) { + printf("vmm: Failed to init the EL2 MMU\n"); + return (ENOMEM); + } } /* Set up the stage 2 pmap callbacks */ MPASS(pmap_clean_stage2_tlbi == NULL); - pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi; - pmap_stage2_invalidate_range = vmm_pmap_invalidate_range; - pmap_stage2_invalidate_all = vmm_pmap_invalidate_all; - - /* - * Create an allocator for the virtual address space used by EL2. - * EL2 code is identity-mapped; the allocator is used to find space for - * VM structures. - */ - el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK); - - /* Create the mappings for the hypervisor translation table. */ - hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); - - /* We need an physical identity mapping for when we activate the MMU */ - hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); - rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, - VM_PROT_READ | VM_PROT_EXECUTE); - MPASS(rv); + pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi; + pmap_stage2_invalidate_range = vmm_s2_tlbi_range; + pmap_stage2_invalidate_all = vmm_s2_tlbi_all; - next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); - - /* Create a per-CPU hypervisor stack */ - CPU_FOREACH(cpu) { - stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); - stack_hyp_va[cpu] = next_hyp_va; - - for (i = 0; i < VMM_STACK_PAGES; i++) { - rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), - PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), - VM_PROT_READ | VM_PROT_WRITE); - MPASS(rv); + if (!in_vhe()) { + /* + * Create an allocator for the virtual address space used by + * EL2. EL2 code is identity-mapped; the allocator is used to + * find space for VM structures. + */ + el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, + M_WAITOK); + + /* Create the mappings for the hypervisor translation table. */ + hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code); + + /* We need an physical identity mapping for when we activate the MMU */ + hyp_code_base = vmm_base = vtophys(&vmm_hyp_code); + rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base, + VM_PROT_READ | VM_PROT_EXECUTE); + MPASS(rv); + + next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE); + + /* Create a per-CPU hypervisor stack */ + CPU_FOREACH(cpu) { + stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO); + stack_hyp_va[cpu] = next_hyp_va; + + for (i = 0; i < VMM_STACK_PAGES; i++) { + rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i), + PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)), + VM_PROT_READ | VM_PROT_WRITE); + MPASS(rv); + } + next_hyp_va += L2_SIZE; } - next_hyp_va += L2_SIZE; - } - el2_regs.tcr_el2 = TCR_EL2_RES1; - el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, - TCR_EL2_PS_52BITS); - el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); - el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; + el2_regs.tcr_el2 = TCR_EL2_RES1; + el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT, + TCR_EL2_PS_52BITS); + el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS); + el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA; #if PAGE_SIZE == PAGE_SIZE_4K - el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; + el2_regs.tcr_el2 |= TCR_EL2_TG0_4K; #elif PAGE_SIZE == PAGE_SIZE_16K - el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; + el2_regs.tcr_el2 |= TCR_EL2_TG0_16K; #else #error Unsupported page size #endif #ifdef SMP - el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; + el2_regs.tcr_el2 |= TCR_EL2_SH0_IS; #endif + } - switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) { + switch (pa_range_bits << TCR_EL2_PS_SHIFT) { case TCR_EL2_PS_32BITS: vmm_max_ipa_bits = 32; break; @@ -381,8 +383,6 @@ vmmops_modinit(int ipinum) * shareable */ el2_regs.vtcr_el2 = VTCR_EL2_RES1; - el2_regs.vtcr_el2 |= - min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA; el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits); el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels); @@ -396,42 +396,55 @@ vmmops_modinit(int ipinum) #ifdef SMP el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS; #endif + /* + * If FEAT_LPA2 is enabled in the host then we need to enable it here + * so the page tables created by pmap.c are correct. The meaning of + * the shareability field changes to become address bits when this + * is set. + */ + if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) { + el2_regs.vtcr_el2 |= VTCR_EL2_DS; + el2_regs.vtcr_el2 |= + min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT); + } else { + el2_regs.vtcr_el2 |= + min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT); + } smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs); - /* Add memory to the vmem allocator (checking there is space) */ - if (vmm_base > (L2_SIZE + PAGE_SIZE)) { - /* - * Ensure there is an L2 block before the vmm code to check - * for buffer overflows on earlier data. Include the PAGE_SIZE - * of the minimum we can allocate. - */ - vmm_base -= L2_SIZE + PAGE_SIZE; - vmm_base = rounddown2(vmm_base, L2_SIZE); + if (!in_vhe()) { + /* Add memory to the vmem allocator (checking there is space) */ + if (vmm_base > (L2_SIZE + PAGE_SIZE)) { + /* + * Ensure there is an L2 block before the vmm code to check + * for buffer overflows on earlier data. Include the PAGE_SIZE + * of the minimum we can allocate. + */ + vmm_base -= L2_SIZE + PAGE_SIZE; + vmm_base = rounddown2(vmm_base, L2_SIZE); + + /* + * Check there is memory before the vmm code to add. + * + * Reserve the L2 block at address 0 so NULL dereference will + * raise an exception. + */ + if (vmm_base > L2_SIZE) + vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, + M_WAITOK); + } /* - * Check there is memory before the vmm code to add. - * - * Reserve the L2 block at address 0 so NULL dereference will - * raise an exception. + * Add the memory after the stacks. There is most of an L2 block + * between the last stack and the first allocation so this should + * be safe without adding more padding. */ - if (vmm_base > L2_SIZE) - vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE, - M_WAITOK); + if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) + vmem_add(el2_mem_alloc, next_hyp_va, + HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); } - - /* - * Add the memory after the stacks. There is most of an L2 block - * between the last stack and the first allocation so this should - * be safe without adding more padding. - */ - if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE) - vmem_add(el2_mem_alloc, next_hyp_va, - HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK); - - daif = intr_disable(); - cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL); - intr_restore(daif); + cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL); vgic_init(); vtimer_init(cnthctl_el2); @@ -444,21 +457,25 @@ vmmops_modcleanup(void) { int cpu; - smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); + if (!in_vhe()) { + smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL); - CPU_FOREACH(cpu) { - vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE, - false); - } + CPU_FOREACH(cpu) { + vmmpmap_remove(stack_hyp_va[cpu], + VMM_STACK_PAGES * PAGE_SIZE, false); + } - vmmpmap_remove(hyp_code_base, hyp_code_len, false); + vmmpmap_remove(hyp_code_base, hyp_code_len, false); + } vtimer_cleanup(); - vmmpmap_fini(); + if (!in_vhe()) { + vmmpmap_fini(); - CPU_FOREACH(cpu) - free(stack[cpu], M_HYP); + CPU_FOREACH(cpu) + free(stack[cpu], M_HYP); + } pmap_clean_stage2_tlbi = NULL; pmap_stage2_invalidate_range = NULL; @@ -510,8 +527,9 @@ vmmops_init(struct vm *vm, pmap_t pmap) vtimer_vminit(hyp); vgic_vminit(hyp); - hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, - VM_PROT_READ | VM_PROT_WRITE); + if (!in_vhe()) + hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size, + VM_PROT_READ | VM_PROT_WRITE); return (hyp); } @@ -539,8 +557,9 @@ vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid) vtimer_cpuinit(hypctx); vgic_cpuinit(hypctx); - hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, - VM_PROT_READ | VM_PROT_WRITE); + if (!in_vhe()) + hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size, + VM_PROT_READ | VM_PROT_WRITE); return (hypctx); } @@ -567,26 +586,6 @@ vmmops_vmspace_free(struct vmspace *vmspace) vmspace_free(vmspace); } -static void -vmm_pmap_clean_stage2_tlbi(void) -{ - vmm_call_hyp(HYP_CLEAN_S2_TLBI); -} - -static void -vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva, - bool final_only) -{ - MPASS(eva > sva); - vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only); -} - -static void -vmm_pmap_invalidate_all(uint64_t vttbr) -{ - vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr); -} - static inline void arm64_print_hyp_regs(struct vm_exit *vme) { @@ -700,7 +699,14 @@ handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret, arm64_gen_reg_emul_data(esr_iss, vme_ret); vme_ret->exitcode = VM_EXITCODE_REG_EMUL; break; - + case EXCP_BRK: + vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1); + vme_ret->exitcode = VM_EXITCODE_BRK; + break; + case EXCP_SOFTSTP_EL0: + vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1); + vme_ret->exitcode = VM_EXITCODE_SS; + break; case EXCP_INSN_ABORT_L: case EXCP_DATA_ABORT_L: vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ? @@ -1101,7 +1107,7 @@ vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) * Update fields that may change on exeption entry * based on how sctlr_el1 is configured. */ - if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0) + if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0) hypctx->tf.tf_spsr |= PSR_PAN; if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0) hypctx->tf.tf_spsr &= ~PSR_SSBS; @@ -1136,16 +1142,13 @@ vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo) vgic_flush_hwstate(hypctx); /* Call into EL2 to switch to the guest */ - excp_type = vmm_call_hyp(HYP_ENTER_GUEST, - hyp->el2_addr, hypctx->el2_addr); + excp_type = vmm_enter_guest(hyp, hypctx); vgic_sync_hwstate(hypctx); vtimer_sync_hwstate(hypctx); /* - * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi - * depends on this meaning we activate the VM before entering - * the vm again + * Deactivate the stage2 pmap. */ PCPU_SET(curvmpmap, NULL); intr_restore(daif); @@ -1198,7 +1201,8 @@ vmmops_vcpu_cleanup(void *vcpui) vtimer_cpucleanup(hypctx); vgic_cpucleanup(hypctx); - vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); + if (!in_vhe()) + vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true); free(hypctx, M_HYP); } @@ -1213,7 +1217,8 @@ vmmops_cleanup(void *vmi) smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp); - vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); + if (!in_vhe()) + vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true); free(hyp, M_HYP); } @@ -1313,6 +1318,7 @@ vmmops_exception(void *vcpui, uint64_t esr, uint64_t far) int vmmops_getcap(void *vcpui, int num, int *retval) { + struct hypctx *hypctx = vcpui; int ret; ret = ENOENT; @@ -1322,6 +1328,11 @@ vmmops_getcap(void *vcpui, int num, int *retval) *retval = 1; ret = 0; break; + case VM_CAP_BRK_EXIT: + case VM_CAP_SS_EXIT: + case VM_CAP_MASK_HWINTR: + *retval = (hypctx->setcaps & (1ul << num)) != 0; + break; default: break; } @@ -1332,6 +1343,68 @@ vmmops_getcap(void *vcpui, int num, int *retval) int vmmops_setcap(void *vcpui, int num, int val) { + struct hypctx *hypctx = vcpui; + int ret; + + ret = 0; - return (ENOENT); + switch (num) { + case VM_CAP_BRK_EXIT: + if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) + break; + if (val != 0) + hypctx->mdcr_el2 |= MDCR_EL2_TDE; + else + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + break; + case VM_CAP_SS_EXIT: + if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) + break; + + if (val != 0) { + hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS); + hypctx->debug_mdscr |= hypctx->mdscr_el1 & + (MDSCR_SS | MDSCR_KDE); + + hypctx->tf.tf_spsr |= PSR_SS; + hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE; + hypctx->mdcr_el2 |= MDCR_EL2_TDE; + } else { + hypctx->tf.tf_spsr &= ~PSR_SS; + hypctx->tf.tf_spsr |= hypctx->debug_spsr; + hypctx->debug_spsr &= ~PSR_SS; + hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdscr_el1 |= hypctx->debug_mdscr; + hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE); + hypctx->mdcr_el2 &= ~MDCR_EL2_TDE; + } + break; + case VM_CAP_MASK_HWINTR: + if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0)) + break; + + if (val != 0) { + hypctx->debug_spsr |= (hypctx->tf.tf_spsr & + (PSR_I | PSR_F)); + hypctx->tf.tf_spsr |= PSR_I | PSR_F; + } else { + hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F); + hypctx->tf.tf_spsr |= (hypctx->debug_spsr & + (PSR_I | PSR_F)); + hypctx->debug_spsr &= ~(PSR_I | PSR_F); + } + break; + default: + ret = ENOENT; + break; + } + + if (ret == 0) { + if (val == 0) + hypctx->setcaps &= ~(1ul << num); + else + hypctx->setcaps |= (1ul << num); + } + + return (ret); } |