aboutsummaryrefslogtreecommitdiff
path: root/sys/arm64/vmm/vmm_arm64.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arm64/vmm/vmm_arm64.c')
-rw-r--r--sys/arm64/vmm/vmm_arm64.c369
1 files changed, 221 insertions, 148 deletions
diff --git a/sys/arm64/vmm/vmm_arm64.c b/sys/arm64/vmm/vmm_arm64.c
index e71761f9ccef..de2425aae0a1 100644
--- a/sys/arm64/vmm/vmm_arm64.c
+++ b/sys/arm64/vmm/vmm_arm64.c
@@ -58,6 +58,8 @@
#include <machine/hypervisor.h>
#include <machine/pmap.h>
+#include <dev/vmm/vmm_mem.h>
+
#include "mmu.h"
#include "arm64.h"
#include "hyp.h"
@@ -65,6 +67,7 @@
#include "io/vgic.h"
#include "io/vgic_v3.h"
#include "io/vtimer.h"
+#include "vmm_handlers.h"
#include "vmm_stat.h"
#define HANDLED 1
@@ -101,9 +104,6 @@ static vm_offset_t stack_hyp_va[MAXCPU];
static vmem_t *el2_mem_alloc;
static void arm_setup_vectors(void *arg);
-static void vmm_pmap_clean_stage2_tlbi(void);
-static void vmm_pmap_invalidate_range(uint64_t, vm_offset_t, vm_offset_t, bool);
-static void vmm_pmap_invalidate_all(uint64_t);
DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
@@ -130,20 +130,6 @@ arm_setup_vectors(void *arg)
el2_regs = arg;
arm64_set_active_vcpu(NULL);
- daif = intr_disable();
-
- /*
- * Install the temporary vectors which will be responsible for
- * initializing the VMM when we next trap into EL2.
- *
- * x0: the exception vector table responsible for hypervisor
- * initialization on the next call.
- */
- vmm_call_hyp(vtophys(&vmm_hyp_code));
-
- /* Create and map the hypervisor stack */
- stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
-
/*
* Configure the system control register for EL2:
*
@@ -161,9 +147,27 @@ arm_setup_vectors(void *arg)
sctlr_el2 |= SCTLR_EL2_WXN;
sctlr_el2 &= ~SCTLR_EL2_EE;
- /* Special call to initialize EL2 */
- vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
- sctlr_el2, el2_regs->vtcr_el2);
+ daif = intr_disable();
+
+ if (in_vhe()) {
+ WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2);
+ } else {
+ /*
+ * Install the temporary vectors which will be responsible for
+ * initializing the VMM when we next trap into EL2.
+ *
+ * x0: the exception vector table responsible for hypervisor
+ * initialization on the next call.
+ */
+ vmm_call_hyp(vtophys(&vmm_hyp_code));
+
+ /* Create and map the hypervisor stack */
+ stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
+
+ /* Special call to initialize EL2 */
+ vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
+ sctlr_el2, el2_regs->vtcr_el2);
+ }
intr_restore(daif);
}
@@ -235,22 +239,15 @@ vmmops_modinit(int ipinum)
vm_paddr_t vmm_base;
uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
uint64_t cnthctl_el2;
- register_t daif;
int cpu, i;
bool rv __diagused;
- if (!virt_enabled()) {
+ if (!has_hyp()) {
printf(
"vmm: Processor doesn't have support for virtualization\n");
return (ENXIO);
}
- /* TODO: Support VHE */
- if (in_vhe()) {
- printf("vmm: VHE is unsupported\n");
- return (ENXIO);
- }
-
if (!vgic_present()) {
printf("vmm: No vgic found\n");
return (ENODEV);
@@ -283,67 +280,72 @@ vmmops_modinit(int ipinum)
}
pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
- /* Initialise the EL2 MMU */
- if (!vmmpmap_init()) {
- printf("vmm: Failed to init the EL2 MMU\n");
- return (ENOMEM);
+ if (!in_vhe()) {
+ /* Initialise the EL2 MMU */
+ if (!vmmpmap_init()) {
+ printf("vmm: Failed to init the EL2 MMU\n");
+ return (ENOMEM);
+ }
}
/* Set up the stage 2 pmap callbacks */
MPASS(pmap_clean_stage2_tlbi == NULL);
- pmap_clean_stage2_tlbi = vmm_pmap_clean_stage2_tlbi;
- pmap_stage2_invalidate_range = vmm_pmap_invalidate_range;
- pmap_stage2_invalidate_all = vmm_pmap_invalidate_all;
-
- /*
- * Create an allocator for the virtual address space used by EL2.
- * EL2 code is identity-mapped; the allocator is used to find space for
- * VM structures.
- */
- el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0, M_WAITOK);
-
- /* Create the mappings for the hypervisor translation table. */
- hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
-
- /* We need an physical identity mapping for when we activate the MMU */
- hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
- rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
- VM_PROT_READ | VM_PROT_EXECUTE);
- MPASS(rv);
+ pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi;
+ pmap_stage2_invalidate_range = vmm_s2_tlbi_range;
+ pmap_stage2_invalidate_all = vmm_s2_tlbi_all;
- next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
-
- /* Create a per-CPU hypervisor stack */
- CPU_FOREACH(cpu) {
- stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
- stack_hyp_va[cpu] = next_hyp_va;
-
- for (i = 0; i < VMM_STACK_PAGES; i++) {
- rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
- PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
- VM_PROT_READ | VM_PROT_WRITE);
- MPASS(rv);
+ if (!in_vhe()) {
+ /*
+ * Create an allocator for the virtual address space used by
+ * EL2. EL2 code is identity-mapped; the allocator is used to
+ * find space for VM structures.
+ */
+ el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0,
+ M_WAITOK);
+
+ /* Create the mappings for the hypervisor translation table. */
+ hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
+
+ /* We need an physical identity mapping for when we activate the MMU */
+ hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
+ rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
+ VM_PROT_READ | VM_PROT_EXECUTE);
+ MPASS(rv);
+
+ next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
+
+ /* Create a per-CPU hypervisor stack */
+ CPU_FOREACH(cpu) {
+ stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
+ stack_hyp_va[cpu] = next_hyp_va;
+
+ for (i = 0; i < VMM_STACK_PAGES; i++) {
+ rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
+ PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
+ VM_PROT_READ | VM_PROT_WRITE);
+ MPASS(rv);
+ }
+ next_hyp_va += L2_SIZE;
}
- next_hyp_va += L2_SIZE;
- }
- el2_regs.tcr_el2 = TCR_EL2_RES1;
- el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
- TCR_EL2_PS_52BITS);
- el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
- el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
+ el2_regs.tcr_el2 = TCR_EL2_RES1;
+ el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
+ TCR_EL2_PS_52BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
+ el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
#if PAGE_SIZE == PAGE_SIZE_4K
- el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
#elif PAGE_SIZE == PAGE_SIZE_16K
- el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
+ el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
#else
#error Unsupported page size
#endif
#ifdef SMP
- el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
+ el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
#endif
+ }
- switch (el2_regs.tcr_el2 & TCR_EL2_PS_MASK) {
+ switch (pa_range_bits << TCR_EL2_PS_SHIFT) {
case TCR_EL2_PS_32BITS:
vmm_max_ipa_bits = 32;
break;
@@ -381,8 +383,6 @@ vmmops_modinit(int ipinum)
* shareable
*/
el2_regs.vtcr_el2 = VTCR_EL2_RES1;
- el2_regs.vtcr_el2 |=
- min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
@@ -396,42 +396,55 @@ vmmops_modinit(int ipinum)
#ifdef SMP
el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
#endif
+ /*
+ * If FEAT_LPA2 is enabled in the host then we need to enable it here
+ * so the page tables created by pmap.c are correct. The meaning of
+ * the shareability field changes to become address bits when this
+ * is set.
+ */
+ if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) {
+ el2_regs.vtcr_el2 |= VTCR_EL2_DS;
+ el2_regs.vtcr_el2 |=
+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT);
+ } else {
+ el2_regs.vtcr_el2 |=
+ min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
+ }
smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
- /* Add memory to the vmem allocator (checking there is space) */
- if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
- /*
- * Ensure there is an L2 block before the vmm code to check
- * for buffer overflows on earlier data. Include the PAGE_SIZE
- * of the minimum we can allocate.
- */
- vmm_base -= L2_SIZE + PAGE_SIZE;
- vmm_base = rounddown2(vmm_base, L2_SIZE);
+ if (!in_vhe()) {
+ /* Add memory to the vmem allocator (checking there is space) */
+ if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
+ /*
+ * Ensure there is an L2 block before the vmm code to check
+ * for buffer overflows on earlier data. Include the PAGE_SIZE
+ * of the minimum we can allocate.
+ */
+ vmm_base -= L2_SIZE + PAGE_SIZE;
+ vmm_base = rounddown2(vmm_base, L2_SIZE);
+
+ /*
+ * Check there is memory before the vmm code to add.
+ *
+ * Reserve the L2 block at address 0 so NULL dereference will
+ * raise an exception.
+ */
+ if (vmm_base > L2_SIZE)
+ vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
+ M_WAITOK);
+ }
/*
- * Check there is memory before the vmm code to add.
- *
- * Reserve the L2 block at address 0 so NULL dereference will
- * raise an exception.
+ * Add the memory after the stacks. There is most of an L2 block
+ * between the last stack and the first allocation so this should
+ * be safe without adding more padding.
*/
- if (vmm_base > L2_SIZE)
- vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
- M_WAITOK);
+ if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
+ vmem_add(el2_mem_alloc, next_hyp_va,
+ HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
}
-
- /*
- * Add the memory after the stacks. There is most of an L2 block
- * between the last stack and the first allocation so this should
- * be safe without adding more padding.
- */
- if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
- vmem_add(el2_mem_alloc, next_hyp_va,
- HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
-
- daif = intr_disable();
- cnthctl_el2 = vmm_call_hyp(HYP_READ_REGISTER, HYP_REG_CNTHCTL);
- intr_restore(daif);
+ cnthctl_el2 = vmm_read_reg(HYP_REG_CNTHCTL);
vgic_init();
vtimer_init(cnthctl_el2);
@@ -444,21 +457,25 @@ vmmops_modcleanup(void)
{
int cpu;
- smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
+ if (!in_vhe()) {
+ smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
- CPU_FOREACH(cpu) {
- vmmpmap_remove(stack_hyp_va[cpu], VMM_STACK_PAGES * PAGE_SIZE,
- false);
- }
+ CPU_FOREACH(cpu) {
+ vmmpmap_remove(stack_hyp_va[cpu],
+ VMM_STACK_PAGES * PAGE_SIZE, false);
+ }
- vmmpmap_remove(hyp_code_base, hyp_code_len, false);
+ vmmpmap_remove(hyp_code_base, hyp_code_len, false);
+ }
vtimer_cleanup();
- vmmpmap_fini();
+ if (!in_vhe()) {
+ vmmpmap_fini();
- CPU_FOREACH(cpu)
- free(stack[cpu], M_HYP);
+ CPU_FOREACH(cpu)
+ free(stack[cpu], M_HYP);
+ }
pmap_clean_stage2_tlbi = NULL;
pmap_stage2_invalidate_range = NULL;
@@ -510,8 +527,9 @@ vmmops_init(struct vm *vm, pmap_t pmap)
vtimer_vminit(hyp);
vgic_vminit(hyp);
- hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
- VM_PROT_READ | VM_PROT_WRITE);
+ if (!in_vhe())
+ hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
+ VM_PROT_READ | VM_PROT_WRITE);
return (hyp);
}
@@ -539,8 +557,9 @@ vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
vtimer_cpuinit(hypctx);
vgic_cpuinit(hypctx);
- hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
- VM_PROT_READ | VM_PROT_WRITE);
+ if (!in_vhe())
+ hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
+ VM_PROT_READ | VM_PROT_WRITE);
return (hypctx);
}
@@ -567,26 +586,6 @@ vmmops_vmspace_free(struct vmspace *vmspace)
vmspace_free(vmspace);
}
-static void
-vmm_pmap_clean_stage2_tlbi(void)
-{
- vmm_call_hyp(HYP_CLEAN_S2_TLBI);
-}
-
-static void
-vmm_pmap_invalidate_range(uint64_t vttbr, vm_offset_t sva, vm_offset_t eva,
- bool final_only)
-{
- MPASS(eva > sva);
- vmm_call_hyp(HYP_S2_TLBI_RANGE, vttbr, sva, eva, final_only);
-}
-
-static void
-vmm_pmap_invalidate_all(uint64_t vttbr)
-{
- vmm_call_hyp(HYP_S2_TLBI_ALL, vttbr);
-}
-
static inline void
arm64_print_hyp_regs(struct vm_exit *vme)
{
@@ -700,7 +699,14 @@ handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
arm64_gen_reg_emul_data(esr_iss, vme_ret);
vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
break;
-
+ case EXCP_BRK:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1);
+ vme_ret->exitcode = VM_EXITCODE_BRK;
+ break;
+ case EXCP_SOFTSTP_EL0:
+ vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1);
+ vme_ret->exitcode = VM_EXITCODE_SS;
+ break;
case EXCP_INSN_ABORT_L:
case EXCP_DATA_ABORT_L:
vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
@@ -1101,7 +1107,7 @@ vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
* Update fields that may change on exeption entry
* based on how sctlr_el1 is configured.
*/
- if ((hypctx->sctlr_el1 & SCTLR_SPAN) != 0)
+ if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0)
hypctx->tf.tf_spsr |= PSR_PAN;
if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
hypctx->tf.tf_spsr &= ~PSR_SSBS;
@@ -1136,16 +1142,13 @@ vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
vgic_flush_hwstate(hypctx);
/* Call into EL2 to switch to the guest */
- excp_type = vmm_call_hyp(HYP_ENTER_GUEST,
- hyp->el2_addr, hypctx->el2_addr);
+ excp_type = vmm_enter_guest(hyp, hypctx);
vgic_sync_hwstate(hypctx);
vtimer_sync_hwstate(hypctx);
/*
- * Deactivate the stage2 pmap. vmm_pmap_clean_stage2_tlbi
- * depends on this meaning we activate the VM before entering
- * the vm again
+ * Deactivate the stage2 pmap.
*/
PCPU_SET(curvmpmap, NULL);
intr_restore(daif);
@@ -1198,7 +1201,8 @@ vmmops_vcpu_cleanup(void *vcpui)
vtimer_cpucleanup(hypctx);
vgic_cpucleanup(hypctx);
- vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
+ if (!in_vhe())
+ vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
free(hypctx, M_HYP);
}
@@ -1213,7 +1217,8 @@ vmmops_cleanup(void *vmi)
smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
- vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
+ if (!in_vhe())
+ vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
free(hyp, M_HYP);
}
@@ -1313,6 +1318,7 @@ vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
int
vmmops_getcap(void *vcpui, int num, int *retval)
{
+ struct hypctx *hypctx = vcpui;
int ret;
ret = ENOENT;
@@ -1322,6 +1328,11 @@ vmmops_getcap(void *vcpui, int num, int *retval)
*retval = 1;
ret = 0;
break;
+ case VM_CAP_BRK_EXIT:
+ case VM_CAP_SS_EXIT:
+ case VM_CAP_MASK_HWINTR:
+ *retval = (hypctx->setcaps & (1ul << num)) != 0;
+ break;
default:
break;
}
@@ -1332,6 +1343,68 @@ vmmops_getcap(void *vcpui, int num, int *retval)
int
vmmops_setcap(void *vcpui, int num, int val)
{
+ struct hypctx *hypctx = vcpui;
+ int ret;
+
+ ret = 0;
- return (ENOENT);
+ switch (num) {
+ case VM_CAP_BRK_EXIT:
+ if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
+ break;
+ if (val != 0)
+ hypctx->mdcr_el2 |= MDCR_EL2_TDE;
+ else
+ hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
+ break;
+ case VM_CAP_SS_EXIT:
+ if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
+ break;
+
+ if (val != 0) {
+ hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
+ hypctx->debug_mdscr |= hypctx->mdscr_el1 &
+ (MDSCR_SS | MDSCR_KDE);
+
+ hypctx->tf.tf_spsr |= PSR_SS;
+ hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
+ hypctx->mdcr_el2 |= MDCR_EL2_TDE;
+ } else {
+ hypctx->tf.tf_spsr &= ~PSR_SS;
+ hypctx->tf.tf_spsr |= hypctx->debug_spsr;
+ hypctx->debug_spsr &= ~PSR_SS;
+ hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
+ hypctx->mdscr_el1 |= hypctx->debug_mdscr;
+ hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
+ hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
+ }
+ break;
+ case VM_CAP_MASK_HWINTR:
+ if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
+ break;
+
+ if (val != 0) {
+ hypctx->debug_spsr |= (hypctx->tf.tf_spsr &
+ (PSR_I | PSR_F));
+ hypctx->tf.tf_spsr |= PSR_I | PSR_F;
+ } else {
+ hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F);
+ hypctx->tf.tf_spsr |= (hypctx->debug_spsr &
+ (PSR_I | PSR_F));
+ hypctx->debug_spsr &= ~(PSR_I | PSR_F);
+ }
+ break;
+ default:
+ ret = ENOENT;
+ break;
+ }
+
+ if (ret == 0) {
+ if (val == 0)
+ hypctx->setcaps &= ~(1ul << num);
+ else
+ hypctx->setcaps |= (1ul << num);
+ }
+
+ return (ret);
}