8 files changed, 366 insertions, 224 deletions
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 99565fbb69ca..8cada2f4f911 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -74,7 +74,7 @@ extern int		acpi_susp_bounce;
 extern struct susppcb	**susppcbs;
 static cpuset_t		suspcpus;
 
-static void		acpi_stop_beep(void *);
+static void		acpi_stop_beep(void *, enum power_stype);
 
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
 static void		acpi_wakeup_cpus(struct acpi_softc *);
@@ -88,7 +88,7 @@ static void		acpi_wakeup_cpus(struct acpi_softc *);
 } while (0)
 
 static void
-acpi_stop_beep(void *arg)
+acpi_stop_beep(void *arg, enum power_stype stype)
 {
 
 	if (acpi_resume_beep != 0)
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 6cc2d58bbbcc..933f1ac0051f 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -179,7 +179,7 @@ freebsd_brand_info_la57_img_compat(const struct image_params *imgp,
 	return (!prefer_uva_la48);
 }
 
-static Elf64_Brandinfo freebsd_brand_info_la48 = {
+static const Elf64_Brandinfo freebsd_brand_info_la48 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -190,7 +190,7 @@ static Elf64_Brandinfo freebsd_brand_info_la48 = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 };
 
-static Elf64_Brandinfo freebsd_brand_info_la57 = {
+static const Elf64_Brandinfo freebsd_brand_info_la57 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -216,7 +216,7 @@ sysinit_register_elf64_brand_entries(void *arg __unused)
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
     sysinit_register_elf64_brand_entries, NULL);
 
-static Elf64_Brandinfo freebsd_brand_oinfo = {
+static const Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -226,11 +226,10 @@ static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
-
-SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
 
-static Elf64_Brandinfo kfreebsd_brand_info = {
+static const Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -240,8 +239,7 @@ static Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand_note	= &elf64_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
-
-SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info);
 
 void
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index e35119af8572..ad67510fecf3 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -170,55 +170,63 @@ struct vm_eventinfo {
 	int	*iptr;		/* reqidle cookie */
 };
 
-typedef int	(*vmm_init_func_t)(int ipinum);
-typedef int	(*vmm_cleanup_func_t)(void);
-typedef void	(*vmm_suspend_func_t)(void);
-typedef void	(*vmm_resume_func_t)(void);
-typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
-typedef int	(*vmi_run_func_t)(void *vcpui, register_t rip,
-		    struct pmap *pmap, struct vm_eventinfo *info);
-typedef void	(*vmi_cleanup_func_t)(void *vmi);
-typedef void *	(*vmi_vcpu_init_func_t)(void *vmi, struct vcpu *vcpu,
-		    int vcpu_id);
-typedef void	(*vmi_vcpu_cleanup_func_t)(void *vcpui);
-typedef int	(*vmi_get_register_t)(void *vcpui, int num, uint64_t *retval);
-typedef int	(*vmi_set_register_t)(void *vcpui, int num, uint64_t val);
-typedef int	(*vmi_get_desc_t)(void *vcpui, int num, struct seg_desc *desc);
-typedef int	(*vmi_set_desc_t)(void *vcpui, int num, struct seg_desc *desc);
-typedef int	(*vmi_get_cap_t)(void *vcpui, int num, int *retval);
-typedef int	(*vmi_set_cap_t)(void *vcpui, int num, int val);
-typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
-typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
-typedef struct vlapic * (*vmi_vlapic_init)(void *vcpui);
-typedef void	(*vmi_vlapic_cleanup)(struct vlapic *vlapic);
-typedef int	(*vmi_snapshot_vcpu_t)(void *vcpui, struct vm_snapshot_meta *meta);
-typedef int	(*vmi_restore_tsc_t)(void *vcpui, uint64_t now);
+#define	DECLARE_VMMOPS_FUNC(ret_type, opname, args)		\
+	typedef ret_type (*vmmops_##opname##_t) args;		\
+	ret_type vmmops_##opname args
+
+DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum));
+DECLARE_VMMOPS_FUNC(int, modcleanup, (void));
+DECLARE_VMMOPS_FUNC(void, modresume, (void));
+DECLARE_VMMOPS_FUNC(void, modsuspend, (void));
+DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap));
+DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc,
+    struct pmap *pmap, struct vm_eventinfo *info));
+DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi));
+DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+    int vcpu_id));
+DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui));
+DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval));
+DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val));
+DECLARE_VMMOPS_FUNC(int, getdesc, (void *vcpui, int num,
+    struct seg_desc *desc));
+DECLARE_VMMOPS_FUNC(int, setdesc, (void *vcpui, int num,
+    struct seg_desc *desc));
+DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval));
+DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val));
+DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc,
+    (vm_offset_t min, vm_offset_t max));
+DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
+DECLARE_VMMOPS_FUNC(struct vlapic *, vlapic_init, (void *vcpui));
+DECLARE_VMMOPS_FUNC(void, vlapic_cleanup, (struct vlapic *vlapic));
+DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui,
+    struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
 
 struct vmm_ops {
-	vmm_init_func_t		modinit;	/* module wide initialization */
-	vmm_cleanup_func_t	modcleanup;
-	vmm_resume_func_t	modsuspend;
-	vmm_resume_func_t	modresume;
-
-	vmi_init_func_t		init;		/* vm-specific initialization */
-	vmi_run_func_t		run;
-	vmi_cleanup_func_t	cleanup;
-	vmi_vcpu_init_func_t	vcpu_init;
-	vmi_vcpu_cleanup_func_t	vcpu_cleanup;
-	vmi_get_register_t	getreg;
-	vmi_set_register_t	setreg;
-	vmi_get_desc_t		getdesc;
-	vmi_set_desc_t		setdesc;
-	vmi_get_cap_t		getcap;
-	vmi_set_cap_t		setcap;
-	vmi_vmspace_alloc	vmspace_alloc;
-	vmi_vmspace_free	vmspace_free;
-	vmi_vlapic_init		vlapic_init;
-	vmi_vlapic_cleanup	vlapic_cleanup;
+	vmmops_modinit_t	modinit;	/* module wide initialization */
+	vmmops_modcleanup_t	modcleanup;
+	vmmops_modresume_t	modsuspend;
+	vmmops_modresume_t	modresume;
+
+	vmmops_init_t		init;		/* vm-specific initialization */
+	vmmops_run_t		run;
+	vmmops_cleanup_t	cleanup;
+	vmmops_vcpu_init_t	vcpu_init;
+	vmmops_vcpu_cleanup_t	vcpu_cleanup;
+	vmmops_getreg_t		getreg;
+	vmmops_setreg_t		setreg;
+	vmmops_getdesc_t	getdesc;
+	vmmops_setdesc_t	setdesc;
+	vmmops_getcap_t		getcap;
+	vmmops_setcap_t		setcap;
+	vmmops_vmspace_alloc_t	vmspace_alloc;
+	vmmops_vmspace_free_t	vmspace_free;
+	vmmops_vlapic_init_t	vlapic_init;
+	vmmops_vlapic_cleanup_t	vlapic_cleanup;
 
 	/* checkpoint operations */
-	vmi_snapshot_vcpu_t	vcpu_snapshot;
-	vmi_restore_tsc_t	restore_tsc;
+	vmmops_vcpu_snapshot_t	vcpu_snapshot;
+	vmmops_restore_tsc_t	restore_tsc;
 };
 
 extern const struct vmm_ops vmm_ops_intel;
@@ -229,7 +237,7 @@ extern u_int vm_maxcpu;			/* maximum virtual cpus */
 int vm_create(const char *name, struct vm **retvm);
 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
 void vm_disable_vcpu_creation(struct vm *vm);
-void vm_slock_vcpus(struct vm *vm);
+void vm_lock_vcpus(struct vm *vm);
 void vm_unlock_vcpus(struct vm *vm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
@@ -354,6 +362,7 @@ enum vcpu_state {
 };
 
 int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
+int vcpu_set_state_all(struct vm *vm, enum vcpu_state state);
 enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
 
 static int __inline
@@ -375,7 +384,6 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr);
-struct vmspace *vm_vmspace(struct vm *vm);
 struct vm_mem *vm_mem(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index c8579c5da4ad..890cf01c46a0 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -857,7 +857,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
 	}
 }
 
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
@@ -866,7 +866,7 @@ static Elf_Brandnote linux64_brandnote = {
 	.trans_osrel	= linux_trans_osrel
 };
 
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -877,7 +877,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf64_Brandinfo linux_glibc2brandshort = {
+static const Elf64_Brandinfo linux_glibc2brandshort = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -888,7 +888,7 @@ static Elf64_Brandinfo linux_glibc2brandshort = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf64_Brandinfo linux_muslbrand = {
+static const Elf64_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -900,7 +900,7 @@ static Elf64_Brandinfo linux_muslbrand = {
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
-static Elf64_Brandinfo *linux_brandlist[] = {
+static const Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	&linux_glibc2brandshort,
 	&linux_muslbrand,
@@ -910,7 +910,7 @@ static Elf64_Brandinfo *linux_brandlist[] = {
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
-	Elf64_Brandinfo **brandinfo;
+	const Elf64_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 8fac626f9053..735ebb151017 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -954,7 +954,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
 	}
 }
 
-static Elf_Brandnote linux32_brandnote = {
+static const Elf_Brandnote linux32_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
@@ -963,7 +963,7 @@ static Elf_Brandnote linux32_brandnote = {
 	.trans_osrel	= linux_trans_osrel
 };
 
-static Elf32_Brandinfo linux_brand = {
+static const Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -974,7 +974,7 @@ static Elf32_Brandinfo linux_brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf32_Brandinfo linux_glibc2brand = {
+static const Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -985,7 +985,7 @@ static Elf32_Brandinfo linux_glibc2brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf32_Brandinfo linux_muslbrand = {
+static const Elf32_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -997,7 +997,7 @@ static Elf32_Brandinfo linux_muslbrand = {
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
-static Elf32_Brandinfo *linux_brandlist[] = {
+static const Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	&linux_muslbrand,
@@ -1007,7 +1007,7 @@ static Elf32_Brandinfo *linux_brandlist[] = {
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
-	Elf32_Brandinfo **brandinfo;
+	const Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
diff --git a/sys/amd64/pt/pt.c b/sys/amd64/pt/pt.c
index c7b75767680a..6b2296de049c 100644
--- a/sys/amd64/pt/pt.c
+++ b/sys/amd64/pt/pt.c
@@ -42,15 +42,15 @@
  */
 
 #include <sys/systm.h>
+#include <sys/bus.h>
 #include <sys/hwt.h>
+#include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
-#include <sys/sdt.h>
 #include <sys/smp.h>
-#include <sys/taskqueue.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
@@ -94,12 +94,7 @@
 
 MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace");
 
-SDT_PROVIDER_DEFINE(pt);
-SDT_PROBE_DEFINE(pt, , , topa__intr);
-
-TASKQUEUE_FAST_DEFINE_THREAD(pt);
-
-static void pt_send_buffer_record(void *arg, int pending __unused);
+static void pt_send_buffer_record(void *arg);
 static int pt_topa_intr(struct trapframe *tf);
 
 /*
@@ -122,29 +117,24 @@ struct pt_buffer {
 	size_t size;
 	struct mtx lock; /* Lock for fields below. */
 	vm_offset_t offset;
-	uint64_t wrap_count;
-	int curpage;
 };
 
 struct pt_ctx {
 	int id;
 	struct pt_buffer buf; /* ToPA buffer metadata */
-	struct task task;     /* ToPA buffer notification task */
 	struct hwt_context *hwt_ctx;
 	uint8_t *save_area; /* PT XSAVE area */
 };
 /* PT tracing contexts used for CPU mode. */
 static struct pt_ctx *pt_pcpu_ctx;
 
-enum pt_cpu_state {
-	PT_DISABLED = 0,
-	PT_STOPPED,
-	PT_ACTIVE
-};
+enum pt_cpu_state { PT_INACTIVE = 0, PT_ACTIVE };
 
 static struct pt_cpu {
 	struct pt_ctx *ctx;	 /* active PT tracing context */
 	enum pt_cpu_state state; /* used as part of trace stop protocol */
+	void *swi_cookie;	 /* Software interrupt handler context */
+	int in_pcint_handler;
 } *pt_pcpu;
 
 /*
@@ -199,31 +189,28 @@ static __inline void
 pt_update_buffer(struct pt_buffer *buf)
 {
 	uint64_t reg;
-	int curpage;
+	uint64_t offset;
 
 	/* Update buffer offset. */
 	reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
-	curpage = (reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT;
-	mtx_lock_spin(&buf->lock);
-	/* Check if the output wrapped. */
-	if (buf->curpage > curpage)
-		buf->wrap_count++;
-	buf->curpage = curpage;
-	buf->offset = reg >> 32;
-	mtx_unlock_spin(&buf->lock);
-
-	dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__,
-	    buf->wrap_count, buf->curpage, buf->offset);
+	offset = ((reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT) * PAGE_SIZE;
+	offset += (reg >> 32);
+
+	atomic_store_rel_64(&buf->offset, offset);
 }
 
 static __inline void
 pt_fill_buffer_record(int id, struct pt_buffer *buf,
     struct hwt_record_entry *rec)
 {
+	vm_offset_t offset;
+
+	offset = atomic_load_acq_64(&buf->offset);
+
 	rec->record_type = HWT_RECORD_BUFFER;
 	rec->buf_id = id;
-	rec->curpage = buf->curpage;
-	rec->offset = buf->offset + (buf->wrap_count * buf->size);
+	rec->curpage = offset / PAGE_SIZE;
+	rec->offset = offset & PAGE_MASK;
 }
 
 /*
@@ -273,9 +260,9 @@ pt_cpu_start(void *dummy)
 	MPASS(cpu->ctx != NULL);
 
 	dprintf("%s: curcpu %d\n", __func__, curcpu);
+	pt_cpu_set_state(curcpu, PT_ACTIVE);
 	load_cr4(rcr4() | CR4_XSAVE);
 	wrmsr(MSR_IA32_RTIT_STATUS, 0);
-	pt_cpu_set_state(curcpu, PT_ACTIVE);
 	pt_cpu_toggle_local(cpu->ctx->save_area, true);
 }
 
@@ -291,16 +278,16 @@ pt_cpu_stop(void *dummy)
 	struct pt_cpu *cpu;
 	struct pt_ctx *ctx;
 
-	/* Shutdown may occur before PT gets properly configured. */
-	if (pt_cpu_get_state(curcpu) == PT_DISABLED)
-		return;
-
 	cpu = &pt_pcpu[curcpu];
 	ctx = cpu->ctx;
-	MPASS(ctx != NULL);
-	dprintf("%s: curcpu %d\n", __func__, curcpu);
 
-	pt_cpu_set_state(curcpu, PT_STOPPED);
+	dprintf("%s: curcpu %d\n", __func__, curcpu);
+	/* Shutdown may occur before PT gets properly configured. */
+	if (ctx == NULL) {
+		dprintf("%s: missing context on cpu %d; bailing\n", __func__,
+		    curcpu);
+		return;
+	}
 	pt_cpu_toggle_local(cpu->ctx->save_area, false);
 	pt_update_buffer(&ctx->buf);
 }
@@ -406,13 +393,11 @@ pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id)
 		return (ENOMEM);
 	dprintf("%s: preparing ToPA buffer\n", __func__);
 	if (pt_topa_prepare(pt_ctx, vm) != 0) {
-		dprintf("%s: failed to prepare ToPA buffer\n", __func__);
 		free(pt_ctx->save_area, M_PT);
 		return (ENOMEM);
 	}
 
 	pt_ctx->id = ctx_id;
-	TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx);
 
 	return (0);
 }
@@ -426,7 +411,6 @@ pt_deinit_ctx(struct pt_ctx *pt_ctx)
 	if (pt_ctx->save_area != NULL)
 		free(pt_ctx->save_area, M_PT);
 	memset(pt_ctx, 0, sizeof(*pt_ctx));
-	pt_ctx->buf.topa_hw = NULL;
 }
 
 /*
@@ -519,7 +503,6 @@ pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id)
 	    XSTATE_XCOMP_BV_COMPACT;
 	pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN;
 	pt_pcpu[cpu_id].ctx = pt_ctx;
-	pt_cpu_set_state(cpu_id, PT_STOPPED);
 
 	return (0);
 }
@@ -549,12 +532,19 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id)
 
 	if (ctx->mode == HWT_MODE_CPU)
 		return;
-
 	KASSERT(curcpu == cpu_id,
 	    ("%s: attempting to disable PT on another cpu", __func__));
+
+	cpu = &pt_pcpu[cpu_id];
+
+	dprintf("%s: waiting for cpu %d to exit interrupt handler\n", __func__,
+	    cpu_id);
+	pt_cpu_set_state(cpu_id, PT_INACTIVE);
+	while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0))
+		;
+
 	pt_cpu_stop(NULL);
 	CPU_CLR(cpu_id, &ctx->cpu_map);
-	cpu = &pt_pcpu[cpu_id];
 	cpu->ctx = NULL;
 }
 
@@ -564,14 +554,14 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id)
 static int
 pt_backend_enable_smp(struct hwt_context *ctx)
 {
-
 	dprintf("%s\n", __func__);
+
+	KASSERT(ctx->mode == HWT_MODE_CPU,
+	    ("%s: should only be used for CPU mode", __func__));
 	if (ctx->mode == HWT_MODE_CPU &&
 	    atomic_swap_32(&cpu_mode_ctr, 1) != 0)
 		return (-1);
 
-	KASSERT(ctx->mode == HWT_MODE_CPU,
-	    ("%s: should only be used for CPU mode", __func__));
 	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL);
 
 	return (0);
@@ -583,6 +573,7 @@ pt_backend_enable_smp(struct hwt_context *ctx)
 static int
 pt_backend_disable_smp(struct hwt_context *ctx)
 {
+	struct pt_cpu *cpu;
 
 	dprintf("%s\n", __func__);
 	if (ctx->mode == HWT_MODE_CPU &&
@@ -593,6 +584,14 @@ pt_backend_disable_smp(struct hwt_context *ctx)
 		dprintf("%s: empty cpu map\n", __func__);
 		return (-1);
 	}
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		cpu = &pt_pcpu[cpu_id];
+		dprintf("%s: waiting for cpu %d to exit interrupt handler\n",
+		    __func__, cpu_id);
+		pt_cpu_set_state(cpu_id, PT_INACTIVE);
+		while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0))
+			;
+	}
 	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL);
 
 	return (0);
@@ -611,13 +610,13 @@ pt_backend_init(struct hwt_context *ctx)
 	int error;
 
 	dprintf("%s\n", __func__);
-	if (ctx->mode == HWT_MODE_CPU) {
-		TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
-			error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id],
-			    hwt_cpu->vm, hwt_cpu->cpu_id);
-			if (error)
-				return (error);
-		}
+	if (ctx->mode != HWT_MODE_CPU)
+		return (0);
+	TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+		error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], hwt_cpu->vm,
+		    hwt_cpu->cpu_id);
+		if (error)
+			return (error);
 	}
 
 	return (0);
@@ -647,20 +646,16 @@ pt_backend_deinit(struct hwt_context *ctx)
 			pt_deinit_ctx(pt_ctx);
 		}
 	} else {
-		CPU_FOREACH(cpu_id) {
-			if (!CPU_ISSET(cpu_id, &ctx->cpu_map))
+		CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+			if (pt_pcpu[cpu_id].ctx == NULL)
 				continue;
-			if (pt_pcpu[cpu_id].ctx != NULL) {
-				KASSERT(pt_pcpu[cpu_id].ctx ==
-					&pt_pcpu_ctx[cpu_id],
-				    ("%s: CPU mode tracing with non-cpu mode PT"
-				     "context active",
-					__func__));
-				pt_pcpu[cpu_id].ctx = NULL;
-			}
-			pt_ctx = &pt_pcpu_ctx[cpu_id];
-			pt_deinit_ctx(pt_ctx);
-			memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu));
+			KASSERT(pt_pcpu[cpu_id].ctx == &pt_pcpu_ctx[cpu_id],
+			    ("%s: CPU mode tracing with non-cpu mode PT"
+			     "context active",
+				__func__));
+			pt_deinit_ctx(pt_pcpu[cpu_id].ctx);
+			pt_pcpu[cpu_id].ctx = NULL;
+			atomic_set_int(&pt_pcpu[cpu_id].in_pcint_handler, 0);
 		}
 	}
 
@@ -675,15 +670,15 @@ pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset,
     uint64_t *data)
 {
 	struct pt_buffer *buf;
+	uint64_t offset;
 
 	if (vm->ctx->mode == HWT_MODE_THREAD)
 		buf = &((struct pt_ctx *)vm->thr->private)->buf;
 	else
 		buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf;
-	mtx_lock_spin(&buf->lock);
-	*curpage = buf->curpage;
-	*curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize);
-	mtx_unlock_spin(&buf->lock);
+	offset = atomic_load_acq_64(&buf->offset);
+	*curpage = offset / PAGE_SIZE;
+	*curpage_offset = offset & PAGE_MASK;
 
 	return (0);
 }
@@ -762,15 +757,13 @@ static struct hwt_backend backend = {
  * Used as a taskqueue routine from the ToPA interrupt handler.
  */
 static void
-pt_send_buffer_record(void *arg, int pending __unused)
+pt_send_buffer_record(void *arg)
 {
+	struct pt_cpu *cpu = (struct pt_cpu *)arg;
 	struct hwt_record_entry record;
-	struct pt_ctx *ctx = (struct pt_ctx *)arg;
 
-	/* Prepare buffer record. */
-	mtx_lock_spin(&ctx->buf.lock);
+	struct pt_ctx *ctx = cpu->ctx;
 	pt_fill_buffer_record(ctx->id, &ctx->buf, &record);
-	mtx_unlock_spin(&ctx->buf.lock);
 	hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT);
 }
 static void
@@ -795,36 +788,40 @@ static int
 pt_topa_intr(struct trapframe *tf)
 {
 	struct pt_buffer *buf;
+	struct pt_cpu *cpu;
 	struct pt_ctx *ctx;
 	uint64_t reg;
 
-	SDT_PROBE0(pt, , , topa__intr);
-
-	if (pt_cpu_get_state(curcpu) != PT_ACTIVE) {
-		return (0);
-	}
+	cpu = &pt_pcpu[curcpu];
 	reg = rdmsr(MSR_IA_GLOBAL_STATUS);
 	if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0) {
-		/* ACK spurious or leftover interrupt. */
 		pt_topa_status_clear();
+		return (0);
+	}
+
+	if (pt_cpu_get_state(curcpu) != PT_ACTIVE) {
 		return (1);
 	}
+	atomic_set_int(&cpu->in_pcint_handler, 1);
 
-	ctx = pt_pcpu[curcpu].ctx;
+	ctx = cpu->ctx;
+	KASSERT(ctx != NULL,
+	    ("%s: cpu %d: ToPA PMI interrupt without an active context",
+		__func__, curcpu));
 	buf = &ctx->buf;
 	KASSERT(buf->topa_hw != NULL,
-	    ("%s: ToPA PMI interrupt with invalid buffer", __func__));
-
+	    ("%s: cpu %d: ToPA PMI interrupt with invalid buffer", __func__,
+		curcpu));
 	pt_cpu_toggle_local(ctx->save_area, false);
 	pt_update_buffer(buf);
 	pt_topa_status_clear();
-	taskqueue_enqueue_flags(taskqueue_pt, &ctx->task,
-	    TASKQUEUE_FAIL_IF_PENDING);
 
 	if (pt_cpu_get_state(curcpu) == PT_ACTIVE) {
+		swi_sched(cpu->swi_cookie, SWI_FROMNMI);
 		pt_cpu_toggle_local(ctx->save_area, true);
 		lapic_reenable_pcint();
 	}
+	atomic_set_int(&cpu->in_pcint_handler, 0);
 	return (1);
 }
 
@@ -839,7 +836,7 @@ static int
 pt_init(void)
 {
 	u_int cp[4];
-	int error;
+	int error, i;
 
 	dprintf("pt: Enumerating part 1\n");
 	cpuid_count(CPUID_PT_LEAF, 0, cp);
@@ -869,20 +866,38 @@ pt_init(void)
 	pt_pcpu_ctx = mallocarray(mp_ncpus, sizeof(struct pt_ctx), M_PT,
 	    M_ZERO | M_WAITOK);
 
+	for (i = 0; i < mp_ncpus; i++) {
+		error = swi_add(&clk_intr_event, "pt", pt_send_buffer_record,
+		    &pt_pcpu[i], SWI_CLOCK, INTR_MPSAFE,
+		    &pt_pcpu[i].swi_cookie);
+		if (error != 0) {
+			dprintf(
+			    "%s: failed to add interrupt handler for cpu: %d\n",
+			    __func__, error);
+			goto err;
+		}
+	}
+
 	nmi_register_handler(pt_topa_intr);
-	if (!lapic_enable_pcint()) {
-		nmi_remove_handler(pt_topa_intr);
-		hwt_backend_unregister(&backend);
-		free(pt_pcpu, M_PT);
-		free(pt_pcpu_ctx, M_PT);
-		pt_pcpu = NULL;
-		pt_pcpu_ctx = NULL;
+	if (lapic_enable_pcint()) {
+		initialized = true;
+		return (0);
+	} else
 		printf("pt: failed to setup interrupt line\n");
-		return (error);
+err:
+	nmi_remove_handler(pt_topa_intr);
+	hwt_backend_unregister(&backend);
+
+	for (i = 0; i < mp_ncpus; i++) {
+		if (pt_pcpu[i].swi_cookie != 0)
+			swi_remove(pt_pcpu[i].swi_cookie);
 	}
-	initialized = true;
+	free(pt_pcpu, M_PT);
+	free(pt_pcpu_ctx, M_PT);
+	pt_pcpu = NULL;
+	pt_pcpu_ctx = NULL;
 
-	return (0);
+	return (error);
 }
 
 /*
@@ -941,14 +956,24 @@ pt_supported(void)
 static void
 pt_deinit(void)
 {
+	int i;
+	struct pt_cpu *cpu;
+
 	if (!initialized)
 		return;
 	nmi_remove_handler(pt_topa_intr);
 	lapic_disable_pcint();
 	hwt_backend_unregister(&backend);
+
+	for (i = 0; i < mp_ncpus; i++) {
+		cpu = &pt_pcpu[i];
+		swi_remove(cpu->swi_cookie);
+	}
+
 	free(pt_pcpu, M_PT);
 	free(pt_pcpu_ctx, M_PT);
 	pt_pcpu = NULL;
+	pt_pcpu_ctx = NULL;
 	initialized = false;
 }
 
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c42da02d0bf6..f7c59847140b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -163,7 +163,6 @@ struct vm {
 	void		*rendezvous_arg;	/* (x) [r] rendezvous func/arg */
 	vm_rendezvous_func_t rendezvous_func;
 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
-	struct vmspace	*vmspace;		/* (o) guest's address space */
 	struct vm_mem	mem;			/* (i) [m+v] guest memory */
 	char		name[VM_MAX_NAMELEN+1];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (o) guest vcpus */
@@ -201,7 +200,7 @@ vmmops_panic(void)
 }
 
 #define	DEFINE_VMMOPS_IFUNC(ret_type, opname, args)			\
-    DEFINE_IFUNC(static, ret_type, vmmops_##opname, args)		\
+    DEFINE_IFUNC(, ret_type, vmmops_##opname, args)			\
     {									\
     	if (vmm_is_intel())						\
     		return (vmm_ops_intel.opname);				\
@@ -499,7 +498,7 @@ MODULE_VERSION(vmm, 1);
 static void
 vm_init(struct vm *vm, bool create)
 {
-	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+	vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
 	vm->iommu = NULL;
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
@@ -563,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
 }
 
 void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
 {
-	sx_slock(&vm->vcpus_init_lock);
+	sx_xlock(&vm->vcpus_init_lock);
 }
 
 void
@@ -584,7 +583,7 @@ int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
-	struct vmspace *vmspace;
+	int error;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
@@ -597,14 +596,13 @@ vm_create(const char *name, struct vm **retvm)
 	    VM_MAX_NAMELEN + 1)
 		return (EINVAL);
 
-	vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48);
-	if (vmspace == NULL)
-		return (ENOMEM);
-
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
+	error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48);
+	if (error != 0) {
+		free(vm, M_VM);
+		return (error);
+	}
 	strcpy(vm->name, name);
-	vm->vmspace = vmspace;
-	vm_mem_init(&vm->mem);
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK |
@@ -685,9 +683,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 	if (destroy) {
 		vm_mem_destroy(vm);
 
-		vmmops_vmspace_free(vm->vmspace);
-		vm->vmspace = NULL;
-
 		free(vm->vcpu, M_VM);
 		sx_destroy(&vm->vcpus_init_lock);
 		mtx_destroy(&vm->rendezvous_mtx);
@@ -731,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	vm_object_t obj;
 
-	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
+	if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL)
 		return (ENOMEM);
 	else
 		return (0);
@@ -741,19 +736,21 @@ int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 
-	vmm_mmio_free(vm->vmspace, gpa, len);
+	vmm_mmio_free(vm_vmspace(vm), gpa, len);
 	return (0);
 }
 
 static int
 vm_iommu_map(struct vm *vm)
 {
+	pmap_t pmap;
 	vm_paddr_t gpa, hpa;
 	struct vm_mem_map *mm;
 	int error, i;
 
 	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		if (!vm_memseg_sysmem(vm, i))
 			continue;
@@ -767,7 +764,7 @@ vm_iommu_map(struct vm *vm)
 		mm->flags |= VM_MEMMAP_F_IOMMU;
 
 		for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
-			hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa);
+			hpa = pmap_extract(pmap, gpa);
 
 			/*
 			 * All mappings in the vmm vmspace must be
@@ -816,7 +813,7 @@ vm_iommu_unmap(struct vm *vm)
 
 		for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
 			KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract(
-			    vmspace_pmap(vm->vmspace), gpa))),
+			    vmspace_pmap(vm_vmspace(vm)), gpa))),
 			    ("vm_iommu_unmap: vm %p gpa %jx not wired",
 			    vm, (uintmax_t)gpa));
 			iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE);
@@ -993,6 +990,54 @@ save_guest_fpustate(struct vcpu *vcpu)
 
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
+/*
+ * Invoke the rendezvous function on the specified vcpu if applicable.  Return
+ * true if the rendezvous is finished, false otherwise.
+ */
+static bool
+vm_rendezvous(struct vcpu *vcpu)
+{
+	struct vm *vm = vcpu->vm;
+	int vcpuid;
+
+	mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
+	KASSERT(vcpu->vm->rendezvous_func != NULL,
+	    ("vm_rendezvous: no rendezvous pending"));
+
+	/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+	CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
+	    &vm->active_cpus);
+
+	vcpuid = vcpu->vcpuid;
+	if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+	    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+		VMM_CTR0(vcpu, "Calling rendezvous func");
+		(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
+		CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+	}
+	if (CPU_CMP(&vm->rendezvous_req_cpus,
+	    &vm->rendezvous_done_cpus) == 0) {
+		VMM_CTR0(vcpu, "Rendezvous completed");
+		CPU_ZERO(&vm->rendezvous_req_cpus);
+		vm->rendezvous_func = NULL;
+		wakeup(&vm->rendezvous_func);
+		return (true);
+	}
+	return (false);
+}
+
+static void
+vcpu_wait_idle(struct vcpu *vcpu)
+{
+	KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
+
+	vcpu->reqidle = 1;
+	vcpu_notify_event_locked(vcpu, false);
+	VMM_CTR1(vcpu, "vcpu state change from %s to "
+	    "idle requested", vcpu_state2str(vcpu->state));
+	msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+}
+
 static int
 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
     bool from_idle)
@@ -1007,13 +1052,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
-		while (vcpu->state != VCPU_IDLE) {
-			vcpu->reqidle = 1;
-			vcpu_notify_event_locked(vcpu, false);
-			VMM_CTR1(vcpu, "vcpu state change from %s to "
-			    "idle requested", vcpu_state2str(vcpu->state));
-			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
-		}
+		while (vcpu->state != VCPU_IDLE)
+			vcpu_wait_idle(vcpu);
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
@@ -1065,6 +1105,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 	return (0);
 }
 
+/*
+ * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
+ * with vm_smp_rendezvous().
+ *
+ * The complexity here suggests that the rendezvous mechanism needs a rethink.
+ */
+int
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
+{
+	cpuset_t locked;
+	struct vcpu *vcpu;
+	int error, i;
+	uint16_t maxcpus;
+
+	KASSERT(newstate != VCPU_IDLE,
+	    ("vcpu_set_state_all: invalid target state %d", newstate));
+
+	error = 0;
+	CPU_ZERO(&locked);
+	maxcpus = vm->maxcpus;
+
+	mtx_lock(&vm->rendezvous_mtx);
+restart:
+	if (vm->rendezvous_func != NULL) {
+		/*
+		 * If we have a pending rendezvous, then the initiator may be
+		 * blocked waiting for other vCPUs to execute the callback.  The
+		 * current thread may be a vCPU thread so we must not block
+		 * waiting for the initiator, otherwise we get a deadlock.
+		 * Thus, execute the callback on behalf of any idle vCPUs.
+		 */
+		for (i = 0; i < maxcpus; i++) {
+			vcpu = vm_vcpu(vm, i);
+			if (vcpu == NULL)
+				continue;
+			vcpu_lock(vcpu);
+			if (vcpu->state == VCPU_IDLE) {
+				(void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
+				    true);
+				CPU_SET(i, &locked);
+			}
+			if (CPU_ISSET(i, &locked)) {
+				/*
+				 * We can safely execute the callback on this
+				 * vCPU's behalf.
+				 */
+				vcpu_unlock(vcpu);
+				(void)vm_rendezvous(vcpu);
+				vcpu_lock(vcpu);
+			}
+			vcpu_unlock(vcpu);
+		}
+	}
+
+	/*
+	 * Now wait for remaining vCPUs to become idle.  This may include the
+	 * initiator of a rendezvous that is currently blocked on the rendezvous
+	 * mutex.
+	 */
+	CPU_FOREACH_ISCLR(i, &locked) {
+		if (i >= maxcpus)
+			break;
+		vcpu = vm_vcpu(vm, i);
+		if (vcpu == NULL)
+			continue;
+		vcpu_lock(vcpu);
+		while (vcpu->state != VCPU_IDLE) {
+			mtx_unlock(&vm->rendezvous_mtx);
+			vcpu_wait_idle(vcpu);
+			vcpu_unlock(vcpu);
+			mtx_lock(&vm->rendezvous_mtx);
+			if (vm->rendezvous_func != NULL)
+				goto restart;
+			vcpu_lock(vcpu);
+		}
+		error = vcpu_set_state_locked(vcpu, newstate, true);
+		vcpu_unlock(vcpu);
+		if (error != 0) {
+			/* Roll back state changes. */
+			CPU_FOREACH_ISSET(i, &locked)
+				(void)vcpu_set_state(vcpu, VCPU_IDLE, false);
+			break;
+		}
+		CPU_SET(i, &locked);
+	}
+	mtx_unlock(&vm->rendezvous_mtx);
+	return (error);
+}
+
 static void
 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
 {
@@ -1086,36 +1215,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 static int
 vm_handle_rendezvous(struct vcpu *vcpu)
 {
-	struct vm *vm = vcpu->vm;
+	struct vm *vm;
 	struct thread *td;
-	int error, vcpuid;
 
-	error = 0;
-	vcpuid = vcpu->vcpuid;
 	td = curthread;
+	vm = vcpu->vm;
+
 	mtx_lock(&vm->rendezvous_mtx);
 	while (vm->rendezvous_func != NULL) {
-		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
-		CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus);
-
-		if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
-		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
-			VMM_CTR0(vcpu, "Calling rendezvous func");
-			(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
-			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
-		}
-		if (CPU_CMP(&vm->rendezvous_req_cpus,
-		    &vm->rendezvous_done_cpus) == 0) {
-			VMM_CTR0(vcpu, "Rendezvous completed");
-			CPU_ZERO(&vm->rendezvous_req_cpus);
-			vm->rendezvous_func = NULL;
-			wakeup(&vm->rendezvous_func);
+		if (vm_rendezvous(vcpu))
 			break;
-		}
+
 		VMM_CTR0(vcpu, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", hz);
 		if (td_ast_pending(td, TDA_SUSPEND)) {
+			int error;
+
 			mtx_unlock(&vm->rendezvous_mtx);
 			error = thread_check_susp(td, true);
 			if (error != 0)
@@ -1249,7 +1365,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
-		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
+		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)),
 		    vme->u.paging.gpa, ftype);
 		if (rv == 0) {
 			VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx",
@@ -1259,7 +1375,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 		}
 	}
 
-	map = &vm->vmspace->vm_map;
+	map = &vm_vmspace(vm)->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
 
 	VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, "
@@ -1560,7 +1676,7 @@ vm_run(struct vcpu *vcpu)
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
-	pmap = vmspace_pmap(vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = &vm->rendezvous_req_cpus;
 	evinfo.sptr = &vm->suspend;
@@ -2302,12 +2418,6 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr)
 	vcpu_unlock(vcpu);
 }
 
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
-	return (vm->vmspace);
-}
-
 struct vm_mem *
 vm_mem(struct vm *vm)
 {
@@ -2519,7 +2629,7 @@ vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
 
 	if (vcpu->vcpuid == 0) {
 		vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE *
-		    vmspace_resident_count(vcpu->vm->vmspace));
+		    vmspace_resident_count(vm_vmspace(vcpu->vm)));
 	}
 }
 
@@ -2529,7 +2639,7 @@ vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
 
 	if (vcpu->vcpuid == 0) {
 		vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE *
-		    pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace)));
+		    pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm))));
 	}
 }
 
diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c
index d8d2b460404c..dfebc9dcadbf 100644
--- a/sys/amd64/vmm/vmm_dev_machdep.c
+++ b/sys/amd64/vmm/vmm_dev_machdep.c
@@ -48,6 +48,7 @@
 #include <x86/apicreg.h>
 
 #include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
 #include "vmm_lapic.h"