diff options
Diffstat (limited to 'sys/arm64/vmm/vmm.c')
-rw-r--r-- | sys/arm64/vmm/vmm.c | 487 |
1 files changed, 86 insertions, 401 deletions
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index a2cc63448f19..3082d2941221 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -60,13 +60,14 @@ #include <machine/vm.h> #include <machine/vmparam.h> #include <machine/vmm.h> -#include <machine/vmm_dev.h> #include <machine/vmm_instruction_emul.h> #include <dev/pci/pcireg.h> +#include <dev/vmm/vmm_dev.h> +#include <dev/vmm/vmm_ktr.h> +#include <dev/vmm/vmm_mem.h> +#include <dev/vmm/vmm_stat.h> -#include "vmm_ktr.h" -#include "vmm_stat.h" #include "arm64.h" #include "mmu.h" @@ -94,25 +95,6 @@ struct vcpu { #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) -struct mem_seg { - uint64_t gpa; - size_t len; - bool wired; - bool sysmem; - vm_object_t object; -}; -#define VM_MAX_MEMSEGS 3 - -struct mem_map { - vm_paddr_t gpa; - size_t len; - vm_ooffset_t segoff; - int segid; - int prot; - int flags; -}; -#define VM_MAX_MEMMAPS 4 - struct vmm_mmio_region { uint64_t start; uint64_t end; @@ -141,11 +123,11 @@ struct vm { volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ int suspend; /* (i) stop VM execution */ + bool dying; /* (o) is dying */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ - struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ - struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ + struct vm_mem mem; /* (i) guest memory */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (i) guest vcpus */ struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS]; @@ -156,7 +138,6 @@ struct vm { uint16_t cores; /* (o) num of cores/socket */ uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ - struct sx mem_segs_lock; /* (o) */ struct sx vcpus_init_lock; /* (o) */ }; @@ -234,10 +215,25 @@ u_int vm_maxcpu; SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &vm_maxcpu, 0, "Maximum number of vCPUs"); -static void vm_free_memmap(struct vm *vm, int ident); -static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu); +/* global statistics */ +VMM_STAT(VMEXIT_COUNT, "total number of vm exits"); +VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception"); +VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted"); +VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted"); +VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted"); +VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted"); +VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort"); +VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort"); +VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception"); +VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq"); +VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt"); +VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception"); +VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception"); +VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception"); +VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception"); + /* * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this * is a safe value for now. @@ -249,7 +245,8 @@ vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks) { #define _FETCH_KERN_REG(reg, field) do { \ regs->field = vmm_arch_regs_masks.field; \ - if (!get_kernel_reg_masked(reg, ®s->field, masks->field)) \ + if (!get_kernel_reg_iss_masked(reg ## _ISS, ®s->field, \ + masks->field)) \ regs->field = 0; \ } while (0) _FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0); @@ -315,6 +312,20 @@ vm_exitinfo(struct vcpu *vcpu) } static int +vmm_unsupported_quirk(void) +{ + /* + * Known to not load on Ampere eMAG + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051 + */ + if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM, + CPU_PART_EMAG8180, 0, 0)) + return (ENXIO); + + return (0); +} + +static int vmm_init(void) { int error; @@ -343,19 +354,29 @@ vmm_handler(module_t mod, int what, void *arg) switch (what) { case MOD_LOAD: - /* TODO: if (vmm_is_hw_supported()) { */ - vmmdev_init(); + error = vmm_unsupported_quirk(); + if (error != 0) + break; + error = vmmdev_init(); + if (error != 0) + break; error = vmm_init(); if (error == 0) vmm_initialized = true; + else + (void)vmmdev_cleanup(); break; case MOD_UNLOAD: - /* TODO: if (vmm_is_hw_supported()) { */ error = vmmdev_cleanup(); if (error == 0 && vmm_initialized) { error = vmmops_modcleanup(); - if (error) + if (error) { + /* + * Something bad happened - prevent new + * VMs from being created + */ vmm_initialized = false; + } } break; default: @@ -376,8 +397,9 @@ static moduledata_t vmm_kmod = { * * - HYP initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). + * - vmm device initialization requires an initialized devfs. */ -DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); +DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void @@ -405,6 +427,14 @@ vm_init(struct vm *vm, bool create) } } +void +vm_disable_vcpu_creation(struct vm *vm) +{ + sx_xlock(&vm->vcpus_init_lock); + vm->dying = true; + sx_xunlock(&vm->vcpus_init_lock); +} + struct vcpu * vm_alloc_vcpu(struct vm *vm, int vcpuid) { @@ -417,13 +447,14 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) if (vcpuid >= vgic_max_cpu_count(vm->cookie)) return (NULL); - vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]); + vcpu = (struct vcpu *) + atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]); if (__predict_true(vcpu != NULL)) return (vcpu); sx_xlock(&vm->vcpus_init_lock); vcpu = vm->vcpu[vcpuid]; - if (vcpu == NULL/* && !vm->dying*/) { + if (vcpu == NULL && !vm->dying) { vcpu = vcpu_alloc(vm, vcpuid); vcpu_init(vcpu); @@ -473,7 +504,7 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->vmspace = vmspace; - sx_init(&vm->mem_segs_lock, "vm mem_segs"); + vm_mem_init(&vm->mem); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->sockets = 1; @@ -522,11 +553,11 @@ vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, static void vm_cleanup(struct vm *vm, bool destroy) { - struct mem_map *mm; pmap_t pmap __diagused; int i; if (destroy) { + vm_xlock_memsegs(vm); pmap = vmspace_pmap(vm->vmspace); sched_pin(); PCPU_SET(curvmpmap, NULL); @@ -534,7 +565,9 @@ vm_cleanup(struct vm *vm, bool destroy) CPU_FOREACH(i) { MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap); } - } + } else + vm_assert_memseg_xlocked(vm); + vgic_detach_from_vm(vm->cookie); @@ -545,25 +578,9 @@ vm_cleanup(struct vm *vm, bool destroy) vmmops_cleanup(vm->cookie); - /* - * System memory is removed from the guest address space only when - * the VM is destroyed. This is because the mapping remains the same - * across VM reset. - * - * Device memory can be relocated by the guest (e.g. using PCI BARs) - * so those mappings are removed on a VM reset. - */ - if (!destroy) { - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (destroy || !sysmem_mapping(vm, mm)) - vm_free_memmap(vm, i); - } - } - + vm_mem_cleanup(vm); if (destroy) { - for (i = 0; i < VM_MAX_MEMSEGS; i++) - vm_free_memseg(vm, i); + vm_mem_destroy(vm); vmmops_vmspace_free(vm->vmspace); vm->vmspace = NULL; @@ -572,7 +589,6 @@ vm_cleanup(struct vm *vm, bool destroy) free(vm->vcpu[i], M_VMM); free(vm->vcpu, M_VMM); sx_destroy(&vm->vcpus_init_lock); - sx_destroy(&vm->mem_segs_lock); } } @@ -608,290 +624,11 @@ vm_name(struct vm *vm) return (vm->name); } -void -vm_slock_memsegs(struct vm *vm) -{ - sx_slock(&vm->mem_segs_lock); -} - -void -vm_xlock_memsegs(struct vm *vm) -{ - sx_xlock(&vm->mem_segs_lock); -} - -void -vm_unlock_memsegs(struct vm *vm) -{ - sx_unlock(&vm->mem_segs_lock); -} - -/* - * Return 'true' if 'gpa' is allocated in the guest address space. - * - * This function is called in the context of a running vcpu which acts as - * an implicit lock on 'vm->mem_maps[]'. - */ -bool -vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) -{ - struct vm *vm = vcpu->vm; - struct mem_map *mm; - int i; - -#ifdef INVARIANTS - int hostcpu, state; - state = vcpu_get_state(vcpu, &hostcpu); - KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, - ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); -#endif - - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) - return (true); /* 'gpa' is sysmem or devmem */ - } - - return (false); -} - -int -vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) -{ - struct mem_seg *seg; - vm_object_t obj; - - sx_assert(&vm->mem_segs_lock, SX_XLOCKED); - - if (ident < 0 || ident >= VM_MAX_MEMSEGS) - return (EINVAL); - - if (len == 0 || (len & PAGE_MASK)) - return (EINVAL); - - seg = &vm->mem_segs[ident]; - if (seg->object != NULL) { - if (seg->len == len && seg->sysmem == sysmem) - return (EEXIST); - else - return (EINVAL); - } - - obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); - if (obj == NULL) - return (ENOMEM); - - seg->len = len; - seg->object = obj; - seg->sysmem = sysmem; - return (0); -} - -int -vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, - vm_object_t *objptr) -{ - struct mem_seg *seg; - - sx_assert(&vm->mem_segs_lock, SX_LOCKED); - - if (ident < 0 || ident >= VM_MAX_MEMSEGS) - return (EINVAL); - - seg = &vm->mem_segs[ident]; - if (len) - *len = seg->len; - if (sysmem) - *sysmem = seg->sysmem; - if (objptr) - *objptr = seg->object; - return (0); -} - -void -vm_free_memseg(struct vm *vm, int ident) -{ - struct mem_seg *seg; - - KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, - ("%s: invalid memseg ident %d", __func__, ident)); - - seg = &vm->mem_segs[ident]; - if (seg->object != NULL) { - vm_object_deallocate(seg->object); - bzero(seg, sizeof(struct mem_seg)); - } -} - -int -vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, - size_t len, int prot, int flags) -{ - struct mem_seg *seg; - struct mem_map *m, *map; - vm_ooffset_t last; - int i, error; - - if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) - return (EINVAL); - - if (flags & ~VM_MEMMAP_F_WIRED) - return (EINVAL); - - if (segid < 0 || segid >= VM_MAX_MEMSEGS) - return (EINVAL); - - seg = &vm->mem_segs[segid]; - if (seg->object == NULL) - return (EINVAL); - - last = first + len; - if (first < 0 || first >= last || last > seg->len) - return (EINVAL); - - if ((gpa | first | last) & PAGE_MASK) - return (EINVAL); - - map = NULL; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - m = &vm->mem_maps[i]; - if (m->len == 0) { - map = m; - break; - } - } - - if (map == NULL) - return (ENOSPC); - - error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, - len, 0, VMFS_NO_SPACE, prot, prot, 0); - if (error != KERN_SUCCESS) - return (EFAULT); - - vm_object_reference(seg->object); - - if (flags & VM_MEMMAP_F_WIRED) { - error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, - VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - if (error != KERN_SUCCESS) { - vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); - return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM : - EFAULT); - } - } - - map->gpa = gpa; - map->len = len; - map->segoff = first; - map->segid = segid; - map->prot = prot; - map->flags = flags; - return (0); -} - -int -vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len) -{ - struct mem_map *m; - int i; - - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - m = &vm->mem_maps[i]; - if (m->gpa == gpa && m->len == len) { - vm_free_memmap(vm, i); - return (0); - } - } - - return (EINVAL); -} - -int -vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, - vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) -{ - struct mem_map *mm, *mmnext; - int i; - - mmnext = NULL; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (mm->len == 0 || mm->gpa < *gpa) - continue; - if (mmnext == NULL || mm->gpa < mmnext->gpa) - mmnext = mm; - } - - if (mmnext != NULL) { - *gpa = mmnext->gpa; - if (segid) - *segid = mmnext->segid; - if (segoff) - *segoff = mmnext->segoff; - if (len) - *len = mmnext->len; - if (prot) - *prot = mmnext->prot; - if (flags) - *flags = mmnext->flags; - return (0); - } else { - return (ENOENT); - } -} - -static void -vm_free_memmap(struct vm *vm, int ident) -{ - struct mem_map *mm; - int error __diagused; - - mm = &vm->mem_maps[ident]; - if (mm->len) { - error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, - mm->gpa + mm->len); - KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", - __func__, error)); - bzero(mm, sizeof(struct mem_map)); - } -} - -static __inline bool -sysmem_mapping(struct vm *vm, struct mem_map *mm) -{ - - if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) - return (true); - else - return (false); -} - -vm_paddr_t -vmm_sysmem_maxaddr(struct vm *vm) -{ - struct mem_map *mm; - vm_paddr_t maxaddr; - int i; - - maxaddr = 0; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (sysmem_mapping(vm, mm)) { - if (maxaddr < mm->gpa + mm->len) - maxaddr = mm->gpa + mm->len; - } - } - return (maxaddr); -} - int vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault) { - - vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault); - return (0); + return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault)); } static int @@ -1319,6 +1056,18 @@ vcpu_notify_event(struct vcpu *vcpu) vcpu_unlock(vcpu); } +struct vmspace * +vm_vmspace(struct vm *vm) +{ + return (vm->vmspace); +} + +struct vm_mem * +vm_mem(struct vm *vm) +{ + return (&vm->mem); +} + static void restore_guest_fpustate(struct vcpu *vcpu) { @@ -1506,70 +1255,6 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu) return (state); } -static void * -_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, - void **cookie) -{ - int i, count, pageoff; - struct mem_map *mm; - vm_page_t m; - - pageoff = gpa & PAGE_MASK; - if (len > PAGE_SIZE - pageoff) - panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); - - count = 0; - for (i = 0; i < VM_MAX_MEMMAPS; i++) { - mm = &vm->mem_maps[i]; - if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && - gpa < mm->gpa + mm->len) { - count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, - trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); - break; - } - } - - if (count == 1) { - *cookie = m; - return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); - } else { - *cookie = NULL; - return (NULL); - } -} - -void * -vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot, - void **cookie) -{ -#ifdef INVARIANTS - /* - * The current vcpu should be frozen to ensure 'vm_memmap[]' - * stability. - */ - int state = vcpu_get_state(vcpu, NULL); - KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", - __func__, state)); -#endif - return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie)); -} - -void * -vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, - void **cookie) -{ - sx_assert(&vm->mem_segs_lock, SX_LOCKED); - return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie)); -} - -void -vm_gpa_release(void *cookie) -{ - vm_page_t m = cookie; - - vm_page_unwire(m, PQ_ACTIVE); -} - int vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval) { |