1 files changed, 86 insertions, 401 deletions
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index a2cc63448f19..3082d2941221 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -60,13 +60,14 @@
 #include <machine/vm.h>
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
-#include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 
 #include <dev/pci/pcireg.h>
+#include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_ktr.h>
+#include <dev/vmm/vmm_mem.h>
+#include <dev/vmm/vmm_stat.h>
 
-#include "vmm_ktr.h"
-#include "vmm_stat.h"
 #include "arm64.h"
 #include "mmu.h"
 
@@ -94,25 +95,6 @@ struct vcpu {
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
-struct mem_seg {
-	uint64_t	gpa;
-	size_t		len;
-	bool		wired;
-	bool		sysmem;
-	vm_object_t	object;
-};
-#define	VM_MAX_MEMSEGS	3
-
-struct mem_map {
-	vm_paddr_t	gpa;
-	size_t		len;
-	vm_ooffset_t	segoff;
-	int		segid;
-	int		prot;
-	int		flags;
-};
-#define	VM_MAX_MEMMAPS	4
-
 struct vmm_mmio_region {
 	uint64_t start;
 	uint64_t end;
@@ -141,11 +123,11 @@ struct vm {
 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
 	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug */
 	int		suspend;		/* (i) stop VM execution */
+	bool		dying;			/* (o) is dying */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
-	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
-	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
+	struct vm_mem	mem;			/* (i) guest memory */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (i) guest vcpus */
 	struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
@@ -156,7 +138,6 @@ struct vm {
 	uint16_t	cores;			/* (o) num of cores/socket */
 	uint16_t	threads;		/* (o) num of threads/core */
 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
-	struct sx	mem_segs_lock;		/* (o) */
 	struct sx	vcpus_init_lock;	/* (o) */
 };
 
@@ -234,10 +215,25 @@ u_int vm_maxcpu;
 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &vm_maxcpu, 0, "Maximum number of vCPUs");
 
-static void vm_free_memmap(struct vm *vm, int ident);
-static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu);
 
+/* global statistics */
+VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
+VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
+VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
+VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
+VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
+VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
+VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
+VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
+VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
+VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
+VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
+VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
+VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
+VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
+VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
+
 /*
  * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
  * is a safe value for now.
@@ -249,7 +245,8 @@ vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
 {
 #define	_FETCH_KERN_REG(reg, field) do {				\
 	regs->field = vmm_arch_regs_masks.field;			\
-	if (!get_kernel_reg_masked(reg, &regs->field, masks->field))	\
+	if (!get_kernel_reg_iss_masked(reg ## _ISS, &regs->field,	\
+	    masks->field))						\
 		regs->field = 0;					\
 } while (0)
 	_FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
@@ -315,6 +312,20 @@ vm_exitinfo(struct vcpu *vcpu)
 }
 
 static int
+vmm_unsupported_quirk(void)
+{
+	/*
+	 * Known to not load on Ampere eMAG
+	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051
+	 */
+	if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM,
+	    CPU_PART_EMAG8180, 0, 0))
+		return (ENXIO);
+
+	return (0);
+}
+
+static int
 vmm_init(void)
 {
 	int error;
@@ -343,19 +354,29 @@ vmm_handler(module_t mod, int what, void *arg)
 
 	switch (what) {
 	case MOD_LOAD:
-		/* TODO: if (vmm_is_hw_supported()) { */
-		vmmdev_init();
+		error = vmm_unsupported_quirk();
+		if (error != 0)
+			break;
+		error = vmmdev_init();
+		if (error != 0)
+			break;
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = true;
+		else
+			(void)vmmdev_cleanup();
 		break;
 	case MOD_UNLOAD:
-		/* TODO: if (vmm_is_hw_supported()) { */
 		error = vmmdev_cleanup();
 		if (error == 0 && vmm_initialized) {
 			error = vmmops_modcleanup();
-			if (error)
+			if (error) {
+				/*
+				 * Something bad happened - prevent new
+				 * VMs from being created
+				 */
 				vmm_initialized = false;
+			}
 		}
 		break;
 	default:
@@ -376,8 +397,9 @@ static moduledata_t vmm_kmod = {
  *
  * - HYP initialization requires smp_rendezvous() and therefore must happen
  *   after SMP is fully functional (after SI_SUB_SMP).
+ * - vmm device initialization requires an initialized devfs.
  */
-DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
+DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 static void
@@ -405,6 +427,14 @@ vm_init(struct vm *vm, bool create)
 	}
 }
 
+void
+vm_disable_vcpu_creation(struct vm *vm)
+{
+	sx_xlock(&vm->vcpus_init_lock);
+	vm->dying = true;
+	sx_xunlock(&vm->vcpus_init_lock);
+}
+
 struct vcpu *
 vm_alloc_vcpu(struct vm *vm, int vcpuid)
 {
@@ -417,13 +447,14 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
 	if (vcpuid >= vgic_max_cpu_count(vm->cookie))
 		return (NULL);
 
-	vcpu = atomic_load_ptr(&vm->vcpu[vcpuid]);
+	vcpu = (struct vcpu *)
+	    atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
 	if (__predict_true(vcpu != NULL))
 		return (vcpu);
 
 	sx_xlock(&vm->vcpus_init_lock);
 	vcpu = vm->vcpu[vcpuid];
-	if (vcpu == NULL/* && !vm->dying*/) {
+	if (vcpu == NULL && !vm->dying) {
 		vcpu = vcpu_alloc(vm, vcpuid);
 		vcpu_init(vcpu);
 
@@ -473,7 +504,7 @@ vm_create(const char *name, struct vm **retvm)
 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
-	sx_init(&vm->mem_segs_lock, "vm mem_segs");
+	vm_mem_init(&vm->mem);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 
 	vm->sockets = 1;
@@ -522,11 +553,11 @@ vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
-	struct mem_map *mm;
 	pmap_t pmap __diagused;
 	int i;
 
 	if (destroy) {
+		vm_xlock_memsegs(vm);
 		pmap = vmspace_pmap(vm->vmspace);
 		sched_pin();
 		PCPU_SET(curvmpmap, NULL);
@@ -534,7 +565,9 @@ vm_cleanup(struct vm *vm, bool destroy)
 		CPU_FOREACH(i) {
 			MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
 		}
-	}
+	} else
+		vm_assert_memseg_xlocked(vm);
+
 
 	vgic_detach_from_vm(vm->cookie);
 
@@ -545,25 +578,9 @@ vm_cleanup(struct vm *vm, bool destroy)
 
 	vmmops_cleanup(vm->cookie);
 
-	/*
-	 * System memory is removed from the guest address space only when
-	 * the VM is destroyed. This is because the mapping remains the same
-	 * across VM reset.
-	 *
-	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
-	 * so those mappings are removed on a VM reset.
-	 */
-	if (!destroy) {
-		for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-			mm = &vm->mem_maps[i];
-			if (destroy || !sysmem_mapping(vm, mm))
-				vm_free_memmap(vm, i);
-		}
-	}
-
+	vm_mem_cleanup(vm);
 	if (destroy) {
-		for (i = 0; i < VM_MAX_MEMSEGS; i++)
-			vm_free_memseg(vm, i);
+		vm_mem_destroy(vm);
 
 		vmmops_vmspace_free(vm->vmspace);
 		vm->vmspace = NULL;
@@ -572,7 +589,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 			free(vm->vcpu[i], M_VMM);
 		free(vm->vcpu, M_VMM);
 		sx_destroy(&vm->vcpus_init_lock);
-		sx_destroy(&vm->mem_segs_lock);
 	}
 }
 
@@ -608,290 +624,11 @@ vm_name(struct vm *vm)
 	return (vm->name);
 }
 
-void
-vm_slock_memsegs(struct vm *vm)
-{
-	sx_slock(&vm->mem_segs_lock);
-}
-
-void
-vm_xlock_memsegs(struct vm *vm)
-{
-	sx_xlock(&vm->mem_segs_lock);
-}
-
-void
-vm_unlock_memsegs(struct vm *vm)
-{
-	sx_unlock(&vm->mem_segs_lock);
-}
-
-/*
- * Return 'true' if 'gpa' is allocated in the guest address space.
- *
- * This function is called in the context of a running vcpu which acts as
- * an implicit lock on 'vm->mem_maps[]'.
- */
-bool
-vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
-{
-	struct vm *vm = vcpu->vm;
-	struct mem_map *mm;
-	int i;
-
-#ifdef INVARIANTS
-	int hostcpu, state;
-	state = vcpu_get_state(vcpu, &hostcpu);
-	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
-	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
-#endif
-
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
-			return (true);		/* 'gpa' is sysmem or devmem */
-	}
-
-	return (false);
-}
-
-int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
-{
-	struct mem_seg *seg;
-	vm_object_t obj;
-
-	sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
-
-	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	if (len == 0 || (len & PAGE_MASK))
-		return (EINVAL);
-
-	seg = &vm->mem_segs[ident];
-	if (seg->object != NULL) {
-		if (seg->len == len && seg->sysmem == sysmem)
-			return (EEXIST);
-		else
-			return (EINVAL);
-	}
-
-	obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
-	if (obj == NULL)
-		return (ENOMEM);
-
-	seg->len = len;
-	seg->object = obj;
-	seg->sysmem = sysmem;
-	return (0);
-}
-
-int
-vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
-    vm_object_t *objptr)
-{
-	struct mem_seg *seg;
-
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
-
-	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	seg = &vm->mem_segs[ident];
-	if (len)
-		*len = seg->len;
-	if (sysmem)
-		*sysmem = seg->sysmem;
-	if (objptr)
-		*objptr = seg->object;
-	return (0);
-}
-
-void
-vm_free_memseg(struct vm *vm, int ident)
-{
-	struct mem_seg *seg;
-
-	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
-	    ("%s: invalid memseg ident %d", __func__, ident));
-
-	seg = &vm->mem_segs[ident];
-	if (seg->object != NULL) {
-		vm_object_deallocate(seg->object);
-		bzero(seg, sizeof(struct mem_seg));
-	}
-}
-
-int
-vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
-    size_t len, int prot, int flags)
-{
-	struct mem_seg *seg;
-	struct mem_map *m, *map;
-	vm_ooffset_t last;
-	int i, error;
-
-	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
-		return (EINVAL);
-
-	if (flags & ~VM_MEMMAP_F_WIRED)
-		return (EINVAL);
-
-	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
-		return (EINVAL);
-
-	seg = &vm->mem_segs[segid];
-	if (seg->object == NULL)
-		return (EINVAL);
-
-	last = first + len;
-	if (first < 0 || first >= last || last > seg->len)
-		return (EINVAL);
-
-	if ((gpa | first | last) & PAGE_MASK)
-		return (EINVAL);
-
-	map = NULL;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		m = &vm->mem_maps[i];
-		if (m->len == 0) {
-			map = m;
-			break;
-		}
-	}
-
-	if (map == NULL)
-		return (ENOSPC);
-
-	error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
-	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
-	if (error != KERN_SUCCESS)
-		return (EFAULT);
-
-	vm_object_reference(seg->object);
-
-	if (flags & VM_MEMMAP_F_WIRED) {
-		error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
-		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-		if (error != KERN_SUCCESS) {
-			vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
-			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
-			    EFAULT);
-		}
-	}
-
-	map->gpa = gpa;
-	map->len = len;
-	map->segoff = first;
-	map->segid = segid;
-	map->prot = prot;
-	map->flags = flags;
-	return (0);
-}
-
-int
-vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
-{
-	struct mem_map *m;
-	int i;
-
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		m = &vm->mem_maps[i];
-		if (m->gpa == gpa && m->len == len) {
-			vm_free_memmap(vm, i);
-			return (0);
-		}
-	}
-
-	return (EINVAL);
-}
-
-int
-vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
-    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
-{
-	struct mem_map *mm, *mmnext;
-	int i;
-
-	mmnext = NULL;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (mm->len == 0 || mm->gpa < *gpa)
-			continue;
-		if (mmnext == NULL || mm->gpa < mmnext->gpa)
-			mmnext = mm;
-	}
-
-	if (mmnext != NULL) {
-		*gpa = mmnext->gpa;
-		if (segid)
-			*segid = mmnext->segid;
-		if (segoff)
-			*segoff = mmnext->segoff;
-		if (len)
-			*len = mmnext->len;
-		if (prot)
-			*prot = mmnext->prot;
-		if (flags)
-			*flags = mmnext->flags;
-		return (0);
-	} else {
-		return (ENOENT);
-	}
-}
-
-static void
-vm_free_memmap(struct vm *vm, int ident)
-{
-	struct mem_map *mm;
-	int error __diagused;
-
-	mm = &vm->mem_maps[ident];
-	if (mm->len) {
-		error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
-		    mm->gpa + mm->len);
-		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
-		    __func__, error));
-		bzero(mm, sizeof(struct mem_map));
-	}
-}
-
-static __inline bool
-sysmem_mapping(struct vm *vm, struct mem_map *mm)
-{
-
-	if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
-		return (true);
-	else
-		return (false);
-}
-
-vm_paddr_t
-vmm_sysmem_maxaddr(struct vm *vm)
-{
-	struct mem_map *mm;
-	vm_paddr_t maxaddr;
-	int i;
-
-	maxaddr = 0;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (sysmem_mapping(vm, mm)) {
-			if (maxaddr < mm->gpa + mm->len)
-				maxaddr = mm->gpa + mm->len;
-		}
-	}
-	return (maxaddr);
-}
-
 int
 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
 {
-
-	vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
-	return (0);
+	return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
 }
 
 static int
@@ -1319,6 +1056,18 @@ vcpu_notify_event(struct vcpu *vcpu)
 	vcpu_unlock(vcpu);
 }
 
+struct vmspace *
+vm_vmspace(struct vm *vm)
+{
+	return (vm->vmspace);
+}
+
+struct vm_mem *
+vm_mem(struct vm *vm)
+{
+	return (&vm->mem);
+}
+
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
@@ -1506,70 +1255,6 @@ vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
 	return (state);
 }
 
-static void *
-_vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-	int i, count, pageoff;
-	struct mem_map *mm;
-	vm_page_t m;
-
-	pageoff = gpa & PAGE_MASK;
-	if (len > PAGE_SIZE - pageoff)
-		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
-
-	count = 0;
-	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
-		mm = &vm->mem_maps[i];
-		if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
-		    gpa < mm->gpa + mm->len) {
-			count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
-			    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
-			break;
-		}
-	}
-
-	if (count == 1) {
-		*cookie = m;
-		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
-	} else {
-		*cookie = NULL;
-		return (NULL);
-	}
-}
-
-void *
-vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
-	    void **cookie)
-{
-#ifdef INVARIANTS
-	/*
-	 * The current vcpu should be frozen to ensure 'vm_memmap[]'
-	 * stability.
-	 */
-	int state = vcpu_get_state(vcpu, NULL);
-	KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
-	    __func__, state));
-#endif
-	return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
-}
-
-void *
-vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
-    void **cookie)
-{
-	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
-	return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
-}
-
-void
-vm_gpa_release(void *cookie)
-{
-	vm_page_t m = cookie;
-
-	vm_page_unwire(m, PQ_ACTIVE);
-}
-
 int
 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
 {