diff options
Diffstat (limited to 'sys/amd64/vmm/vmm.c')
-rw-r--r-- | sys/amd64/vmm/vmm.c | 224 |
1 files changed, 167 insertions, 57 deletions
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index c42da02d0bf6..f7c59847140b 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -163,7 +163,6 @@ struct vm { void *rendezvous_arg; /* (x) [r] rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - struct vmspace *vmspace; /* (o) guest's address space */ struct vm_mem mem; /* (i) [m+v] guest memory */ char name[VM_MAX_NAMELEN+1]; /* (o) virtual machine name */ struct vcpu **vcpu; /* (o) guest vcpus */ @@ -201,7 +200,7 @@ vmmops_panic(void) } #define DEFINE_VMMOPS_IFUNC(ret_type, opname, args) \ - DEFINE_IFUNC(static, ret_type, vmmops_##opname, args) \ + DEFINE_IFUNC(, ret_type, vmmops_##opname, args) \ { \ if (vmm_is_intel()) \ return (vmm_ops_intel.opname); \ @@ -499,7 +498,7 @@ MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { - vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace)); + vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm))); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); @@ -563,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid) } void -vm_slock_vcpus(struct vm *vm) +vm_lock_vcpus(struct vm *vm) { - sx_slock(&vm->vcpus_init_lock); + sx_xlock(&vm->vcpus_init_lock); } void @@ -584,7 +583,7 @@ int vm_create(const char *name, struct vm **retvm) { struct vm *vm; - struct vmspace *vmspace; + int error; /* * If vmm.ko could not be successfully initialized then don't attempt @@ -597,14 +596,13 @@ vm_create(const char *name, struct vm **retvm) VM_MAX_NAMELEN + 1) return (EINVAL); - vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48); - if (vmspace == NULL) - return (ENOMEM); - vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); + error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48); + if (error != 0) { + free(vm, M_VM); + return (error); + } strcpy(vm->name, name); - vm->vmspace = vmspace; - vm_mem_init(&vm->mem); mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); sx_init(&vm->vcpus_init_lock, "vm vcpus"); vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK | @@ -685,9 +683,6 @@ vm_cleanup(struct vm *vm, bool destroy) if (destroy) { vm_mem_destroy(vm); - vmmops_vmspace_free(vm->vmspace); - vm->vmspace = NULL; - free(vm->vcpu, M_VM); sx_destroy(&vm->vcpus_init_lock); mtx_destroy(&vm->rendezvous_mtx); @@ -731,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { vm_object_t obj; - if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) + if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL) return (ENOMEM); else return (0); @@ -741,19 +736,21 @@ int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { - vmm_mmio_free(vm->vmspace, gpa, len); + vmm_mmio_free(vm_vmspace(vm), gpa, len); return (0); } static int vm_iommu_map(struct vm *vm) { + pmap_t pmap; vm_paddr_t gpa, hpa; struct vm_mem_map *mm; int error, i; sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED); + pmap = vmspace_pmap(vm_vmspace(vm)); for (i = 0; i < VM_MAX_MEMMAPS; i++) { if (!vm_memseg_sysmem(vm, i)) continue; @@ -767,7 +764,7 @@ vm_iommu_map(struct vm *vm) mm->flags |= VM_MEMMAP_F_IOMMU; for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { - hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa); + hpa = pmap_extract(pmap, gpa); /* * All mappings in the vmm vmspace must be @@ -816,7 +813,7 @@ vm_iommu_unmap(struct vm *vm) for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) { KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract( - vmspace_pmap(vm->vmspace), gpa))), + vmspace_pmap(vm_vmspace(vm)), gpa))), ("vm_iommu_unmap: vm %p gpa %jx not wired", vm, (uintmax_t)gpa)); iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE); @@ -993,6 +990,54 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); +/* + * Invoke the rendezvous function on the specified vcpu if applicable. Return + * true if the rendezvous is finished, false otherwise. + */ +static bool +vm_rendezvous(struct vcpu *vcpu) +{ + struct vm *vm = vcpu->vm; + int vcpuid; + + mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED); + KASSERT(vcpu->vm->rendezvous_func != NULL, + ("vm_rendezvous: no rendezvous pending")); + + /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ + CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, + &vm->active_cpus); + + vcpuid = vcpu->vcpuid; + if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && + !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { + VMM_CTR0(vcpu, "Calling rendezvous func"); + (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); + CPU_SET(vcpuid, &vm->rendezvous_done_cpus); + } + if (CPU_CMP(&vm->rendezvous_req_cpus, + &vm->rendezvous_done_cpus) == 0) { + VMM_CTR0(vcpu, "Rendezvous completed"); + CPU_ZERO(&vm->rendezvous_req_cpus); + vm->rendezvous_func = NULL; + wakeup(&vm->rendezvous_func); + return (true); + } + return (false); +} + +static void +vcpu_wait_idle(struct vcpu *vcpu) +{ + KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle")); + + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VMM_CTR1(vcpu, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); + msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); +} + static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) @@ -1007,13 +1052,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) { - vcpu->reqidle = 1; - vcpu_notify_event_locked(vcpu, false); - VMM_CTR1(vcpu, "vcpu state change from %s to " - "idle requested", vcpu_state2str(vcpu->state)); - msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); - } + while (vcpu->state != VCPU_IDLE) + vcpu_wait_idle(vcpu); } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1065,6 +1105,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, return (0); } +/* + * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks + * with vm_smp_rendezvous(). + * + * The complexity here suggests that the rendezvous mechanism needs a rethink. + */ +int +vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate) +{ + cpuset_t locked; + struct vcpu *vcpu; + int error, i; + uint16_t maxcpus; + + KASSERT(newstate != VCPU_IDLE, + ("vcpu_set_state_all: invalid target state %d", newstate)); + + error = 0; + CPU_ZERO(&locked); + maxcpus = vm->maxcpus; + + mtx_lock(&vm->rendezvous_mtx); +restart: + if (vm->rendezvous_func != NULL) { + /* + * If we have a pending rendezvous, then the initiator may be + * blocked waiting for other vCPUs to execute the callback. The + * current thread may be a vCPU thread so we must not block + * waiting for the initiator, otherwise we get a deadlock. + * Thus, execute the callback on behalf of any idle vCPUs. + */ + for (i = 0; i < maxcpus; i++) { + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + if (vcpu->state == VCPU_IDLE) { + (void)vcpu_set_state_locked(vcpu, VCPU_FROZEN, + true); + CPU_SET(i, &locked); + } + if (CPU_ISSET(i, &locked)) { + /* + * We can safely execute the callback on this + * vCPU's behalf. + */ + vcpu_unlock(vcpu); + (void)vm_rendezvous(vcpu); + vcpu_lock(vcpu); + } + vcpu_unlock(vcpu); + } + } + + /* + * Now wait for remaining vCPUs to become idle. This may include the + * initiator of a rendezvous that is currently blocked on the rendezvous + * mutex. + */ + CPU_FOREACH_ISCLR(i, &locked) { + if (i >= maxcpus) + break; + vcpu = vm_vcpu(vm, i); + if (vcpu == NULL) + continue; + vcpu_lock(vcpu); + while (vcpu->state != VCPU_IDLE) { + mtx_unlock(&vm->rendezvous_mtx); + vcpu_wait_idle(vcpu); + vcpu_unlock(vcpu); + mtx_lock(&vm->rendezvous_mtx); + if (vm->rendezvous_func != NULL) + goto restart; + vcpu_lock(vcpu); + } + error = vcpu_set_state_locked(vcpu, newstate, true); + vcpu_unlock(vcpu); + if (error != 0) { + /* Roll back state changes. */ + CPU_FOREACH_ISSET(i, &locked) + (void)vcpu_set_state(vcpu, VCPU_IDLE, false); + break; + } + CPU_SET(i, &locked); + } + mtx_unlock(&vm->rendezvous_mtx); + return (error); +} + static void vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate) { @@ -1086,36 +1215,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) static int vm_handle_rendezvous(struct vcpu *vcpu) { - struct vm *vm = vcpu->vm; + struct vm *vm; struct thread *td; - int error, vcpuid; - error = 0; - vcpuid = vcpu->vcpuid; td = curthread; + vm = vcpu->vm; + mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { - /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ - CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus); - - if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && - !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { - VMM_CTR0(vcpu, "Calling rendezvous func"); - (*vm->rendezvous_func)(vcpu, vm->rendezvous_arg); - CPU_SET(vcpuid, &vm->rendezvous_done_cpus); - } - if (CPU_CMP(&vm->rendezvous_req_cpus, - &vm->rendezvous_done_cpus) == 0) { - VMM_CTR0(vcpu, "Rendezvous completed"); - CPU_ZERO(&vm->rendezvous_req_cpus); - vm->rendezvous_func = NULL; - wakeup(&vm->rendezvous_func); + if (vm_rendezvous(vcpu)) break; - } + VMM_CTR0(vcpu, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", hz); if (td_ast_pending(td, TDA_SUSPEND)) { + int error; + mtx_unlock(&vm->rendezvous_mtx); error = thread_check_susp(td, true); if (error != 0) @@ -1249,7 +1365,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) ("vm_handle_paging: invalid fault_type %d", ftype)); if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { - rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), + rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)), vme->u.paging.gpa, ftype); if (rv == 0) { VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx", @@ -1259,7 +1375,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu) } } - map = &vm->vmspace->vm_map; + map = &vm_vmspace(vm)->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, " @@ -1560,7 +1676,7 @@ vm_run(struct vcpu *vcpu) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - pmap = vmspace_pmap(vm->vmspace); + pmap = vmspace_pmap(vm_vmspace(vm)); vme = &vcpu->exitinfo; evinfo.rptr = &vm->rendezvous_req_cpus; evinfo.sptr = &vm->suspend; @@ -2302,12 +2418,6 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr) vcpu_unlock(vcpu); } -struct vmspace * -vm_vmspace(struct vm *vm) -{ - return (vm->vmspace); -} - struct vm_mem * vm_mem(struct vm *vm) { @@ -2519,7 +2629,7 @@ vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat) if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * - vmspace_resident_count(vcpu->vm->vmspace)); + vmspace_resident_count(vm_vmspace(vcpu->vm))); } } @@ -2529,7 +2639,7 @@ vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat) if (vcpu->vcpuid == 0) { vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE * - pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace))); + pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm)))); } } |