aboutsummaryrefslogtreecommitdiff
path: root/sys/amd64/vmm/vmm.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r--sys/amd64/vmm/vmm.c224
1 files changed, 167 insertions, 57 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c42da02d0bf6..f7c59847140b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -163,7 +163,6 @@ struct vm {
void *rendezvous_arg; /* (x) [r] rendezvous func/arg */
vm_rendezvous_func_t rendezvous_func;
struct mtx rendezvous_mtx; /* (o) rendezvous lock */
- struct vmspace *vmspace; /* (o) guest's address space */
struct vm_mem mem; /* (i) [m+v] guest memory */
char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */
struct vcpu **vcpu; /* (o) guest vcpus */
@@ -201,7 +200,7 @@ vmmops_panic(void)
}
#define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \
- DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \
+ DEFINE_IFUNC(, ret_type, vmmops_##opname, args) \
{ \
if (vmm_is_intel()) \
return (vmm_ops_intel.opname); \
@@ -499,7 +498,7 @@ MODULE_VERSION(vmm, 1);
static void
vm_init(struct vm *vm, bool create)
{
- vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+ vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
vm->iommu = NULL;
vm->vioapic = vioapic_init(vm);
vm->vhpet = vhpet_init(vm);
@@ -563,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
}
void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
{
- sx_slock(&vm->vcpus_init_lock);
+ sx_xlock(&vm->vcpus_init_lock);
}
void
@@ -584,7 +583,7 @@ int
vm_create(const char *name, struct vm **retvm)
{
struct vm *vm;
- struct vmspace *vmspace;
+ int error;
/*
* If vmm.ko could not be successfully initialized then don't attempt
@@ -597,14 +596,13 @@ vm_create(const char *name, struct vm **retvm)
VM_MAX_NAMELEN + 1)
return (EINVAL);
- vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48);
- if (vmspace == NULL)
- return (ENOMEM);
-
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
+ error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48);
+ if (error != 0) {
+ free(vm, M_VM);
+ return (error);
+ }
strcpy(vm->name, name);
- vm->vmspace = vmspace;
- vm_mem_init(&vm->mem);
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
sx_init(&vm->vcpus_init_lock, "vm vcpus");
vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK |
@@ -685,9 +683,6 @@ vm_cleanup(struct vm *vm, bool destroy)
if (destroy) {
vm_mem_destroy(vm);
- vmmops_vmspace_free(vm->vmspace);
- vm->vmspace = NULL;
-
free(vm->vcpu, M_VM);
sx_destroy(&vm->vcpus_init_lock);
mtx_destroy(&vm->rendezvous_mtx);
@@ -731,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
vm_object_t obj;
- if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
+ if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL)
return (ENOMEM);
else
return (0);
@@ -741,19 +736,21 @@ int
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
{
- vmm_mmio_free(vm->vmspace, gpa, len);
+ vmm_mmio_free(vm_vmspace(vm), gpa, len);
return (0);
}
static int
vm_iommu_map(struct vm *vm)
{
+ pmap_t pmap;
vm_paddr_t gpa, hpa;
struct vm_mem_map *mm;
int error, i;
sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
+ pmap = vmspace_pmap(vm_vmspace(vm));
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
if (!vm_memseg_sysmem(vm, i))
continue;
@@ -767,7 +764,7 @@ vm_iommu_map(struct vm *vm)
mm->flags |= VM_MEMMAP_F_IOMMU;
for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
- hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa);
+ hpa = pmap_extract(pmap, gpa);
/*
* All mappings in the vmm vmspace must be
@@ -816,7 +813,7 @@ vm_iommu_unmap(struct vm *vm)
for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract(
- vmspace_pmap(vm->vmspace), gpa))),
+ vmspace_pmap(vm_vmspace(vm)), gpa))),
("vm_iommu_unmap: vm %p gpa %jx not wired",
vm, (uintmax_t)gpa));
iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE);
@@ -993,6 +990,54 @@ save_guest_fpustate(struct vcpu *vcpu)
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
+/*
+ * Invoke the rendezvous function on the specified vcpu if applicable. Return
+ * true if the rendezvous is finished, false otherwise.
+ */
+static bool
+vm_rendezvous(struct vcpu *vcpu)
+{
+ struct vm *vm = vcpu->vm;
+ int vcpuid;
+
+ mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
+ KASSERT(vcpu->vm->rendezvous_func != NULL,
+ ("vm_rendezvous: no rendezvous pending"));
+
+ /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+ CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
+ &vm->active_cpus);
+
+ vcpuid = vcpu->vcpuid;
+ if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+ !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+ VMM_CTR0(vcpu, "Calling rendezvous func");
+ (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
+ CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+ }
+ if (CPU_CMP(&vm->rendezvous_req_cpus,
+ &vm->rendezvous_done_cpus) == 0) {
+ VMM_CTR0(vcpu, "Rendezvous completed");
+ CPU_ZERO(&vm->rendezvous_req_cpus);
+ vm->rendezvous_func = NULL;
+ wakeup(&vm->rendezvous_func);
+ return (true);
+ }
+ return (false);
+}
+
+static void
+vcpu_wait_idle(struct vcpu *vcpu)
+{
+ KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
+
+ vcpu->reqidle = 1;
+ vcpu_notify_event_locked(vcpu, false);
+ VMM_CTR1(vcpu, "vcpu state change from %s to "
+ "idle requested", vcpu_state2str(vcpu->state));
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+}
+
static int
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
bool from_idle)
@@ -1007,13 +1052,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
- while (vcpu->state != VCPU_IDLE) {
- vcpu->reqidle = 1;
- vcpu_notify_event_locked(vcpu, false);
- VMM_CTR1(vcpu, "vcpu state change from %s to "
- "idle requested", vcpu_state2str(vcpu->state));
- msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
- }
+ while (vcpu->state != VCPU_IDLE)
+ vcpu_wait_idle(vcpu);
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
@@ -1065,6 +1105,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
return (0);
}
+/*
+ * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
+ * with vm_smp_rendezvous().
+ *
+ * The complexity here suggests that the rendezvous mechanism needs a rethink.
+ */
+int
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
+{
+ cpuset_t locked;
+ struct vcpu *vcpu;
+ int error, i;
+ uint16_t maxcpus;
+
+ KASSERT(newstate != VCPU_IDLE,
+ ("vcpu_set_state_all: invalid target state %d", newstate));
+
+ error = 0;
+ CPU_ZERO(&locked);
+ maxcpus = vm->maxcpus;
+
+ mtx_lock(&vm->rendezvous_mtx);
+restart:
+ if (vm->rendezvous_func != NULL) {
+ /*
+ * If we have a pending rendezvous, then the initiator may be
+ * blocked waiting for other vCPUs to execute the callback. The
+ * current thread may be a vCPU thread so we must not block
+ * waiting for the initiator, otherwise we get a deadlock.
+ * Thus, execute the callback on behalf of any idle vCPUs.
+ */
+ for (i = 0; i < maxcpus; i++) {
+ vcpu = vm_vcpu(vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_lock(vcpu);
+ if (vcpu->state == VCPU_IDLE) {
+ (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
+ true);
+ CPU_SET(i, &locked);
+ }
+ if (CPU_ISSET(i, &locked)) {
+ /*
+ * We can safely execute the callback on this
+ * vCPU's behalf.
+ */
+ vcpu_unlock(vcpu);
+ (void)vm_rendezvous(vcpu);
+ vcpu_lock(vcpu);
+ }
+ vcpu_unlock(vcpu);
+ }
+ }
+
+ /*
+ * Now wait for remaining vCPUs to become idle. This may include the
+ * initiator of a rendezvous that is currently blocked on the rendezvous
+ * mutex.
+ */
+ CPU_FOREACH_ISCLR(i, &locked) {
+ if (i >= maxcpus)
+ break;
+ vcpu = vm_vcpu(vm, i);
+ if (vcpu == NULL)
+ continue;
+ vcpu_lock(vcpu);
+ while (vcpu->state != VCPU_IDLE) {
+ mtx_unlock(&vm->rendezvous_mtx);
+ vcpu_wait_idle(vcpu);
+ vcpu_unlock(vcpu);
+ mtx_lock(&vm->rendezvous_mtx);
+ if (vm->rendezvous_func != NULL)
+ goto restart;
+ vcpu_lock(vcpu);
+ }
+ error = vcpu_set_state_locked(vcpu, newstate, true);
+ vcpu_unlock(vcpu);
+ if (error != 0) {
+ /* Roll back state changes. */
+ CPU_FOREACH_ISSET(i, &locked)
+ (void)vcpu_set_state(vcpu, VCPU_IDLE, false);
+ break;
+ }
+ CPU_SET(i, &locked);
+ }
+ mtx_unlock(&vm->rendezvous_mtx);
+ return (error);
+}
+
static void
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
{
@@ -1086,36 +1215,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
static int
vm_handle_rendezvous(struct vcpu *vcpu)
{
- struct vm *vm = vcpu->vm;
+ struct vm *vm;
struct thread *td;
- int error, vcpuid;
- error = 0;
- vcpuid = vcpu->vcpuid;
td = curthread;
+ vm = vcpu->vm;
+
mtx_lock(&vm->rendezvous_mtx);
while (vm->rendezvous_func != NULL) {
- /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
- CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus);
-
- if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
- !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
- VMM_CTR0(vcpu, "Calling rendezvous func");
- (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
- CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
- }
- if (CPU_CMP(&vm->rendezvous_req_cpus,
- &vm->rendezvous_done_cpus) == 0) {
- VMM_CTR0(vcpu, "Rendezvous completed");
- CPU_ZERO(&vm->rendezvous_req_cpus);
- vm->rendezvous_func = NULL;
- wakeup(&vm->rendezvous_func);
+ if (vm_rendezvous(vcpu))
break;
- }
+
VMM_CTR0(vcpu, "Wait for rendezvous completion");
mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
"vmrndv", hz);
if (td_ast_pending(td, TDA_SUSPEND)) {
+ int error;
+
mtx_unlock(&vm->rendezvous_mtx);
error = thread_check_susp(td, true);
if (error != 0)
@@ -1249,7 +1365,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
("vm_handle_paging: invalid fault_type %d", ftype));
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
- rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
+ rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)),
vme->u.paging.gpa, ftype);
if (rv == 0) {
VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx",
@@ -1259,7 +1375,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
}
}
- map = &vm->vmspace->vm_map;
+ map = &vm_vmspace(vm)->vm_map;
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, "
@@ -1560,7 +1676,7 @@ vm_run(struct vcpu *vcpu)
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
return (EINVAL);
- pmap = vmspace_pmap(vm->vmspace);
+ pmap = vmspace_pmap(vm_vmspace(vm));
vme = &vcpu->exitinfo;
evinfo.rptr = &vm->rendezvous_req_cpus;
evinfo.sptr = &vm->suspend;
@@ -2302,12 +2418,6 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr)
vcpu_unlock(vcpu);
}
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
- return (vm->vmspace);
-}
-
struct vm_mem *
vm_mem(struct vm *vm)
{
@@ -2519,7 +2629,7 @@ vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
if (vcpu->vcpuid == 0) {
vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE *
- vmspace_resident_count(vcpu->vm->vmspace));
+ vmspace_resident_count(vm_vmspace(vcpu->vm)));
}
}
@@ -2529,7 +2639,7 @@ vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
if (vcpu->vcpuid == 0) {
vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE *
- pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace)));
+ pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm))));
}
}