216 files changed, 5204 insertions, 1859 deletions
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 6cc2d58bbbcc..933f1ac0051f 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -179,7 +179,7 @@ freebsd_brand_info_la57_img_compat(const struct image_params *imgp,
 	return (!prefer_uva_la48);
 }
 
-static Elf64_Brandinfo freebsd_brand_info_la48 = {
+static const Elf64_Brandinfo freebsd_brand_info_la48 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -190,7 +190,7 @@ static Elf64_Brandinfo freebsd_brand_info_la48 = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 };
 
-static Elf64_Brandinfo freebsd_brand_info_la57 = {
+static const Elf64_Brandinfo freebsd_brand_info_la57 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -216,7 +216,7 @@ sysinit_register_elf64_brand_entries(void *arg __unused)
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
     sysinit_register_elf64_brand_entries, NULL);
 
-static Elf64_Brandinfo freebsd_brand_oinfo = {
+static const Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -226,11 +226,10 @@ static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
-
-SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
 
-static Elf64_Brandinfo kfreebsd_brand_info = {
+static const Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
@@ -240,8 +239,7 @@ static Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand_note	= &elf64_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
-
-SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info);
 
 void
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index e35119af8572..ad67510fecf3 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -170,55 +170,63 @@ struct vm_eventinfo {
 	int	*iptr;		/* reqidle cookie */
 };
 
-typedef int	(*vmm_init_func_t)(int ipinum);
-typedef int	(*vmm_cleanup_func_t)(void);
-typedef void	(*vmm_suspend_func_t)(void);
-typedef void	(*vmm_resume_func_t)(void);
-typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
-typedef int	(*vmi_run_func_t)(void *vcpui, register_t rip,
-		    struct pmap *pmap, struct vm_eventinfo *info);
-typedef void	(*vmi_cleanup_func_t)(void *vmi);
-typedef void *	(*vmi_vcpu_init_func_t)(void *vmi, struct vcpu *vcpu,
-		    int vcpu_id);
-typedef void	(*vmi_vcpu_cleanup_func_t)(void *vcpui);
-typedef int	(*vmi_get_register_t)(void *vcpui, int num, uint64_t *retval);
-typedef int	(*vmi_set_register_t)(void *vcpui, int num, uint64_t val);
-typedef int	(*vmi_get_desc_t)(void *vcpui, int num, struct seg_desc *desc);
-typedef int	(*vmi_set_desc_t)(void *vcpui, int num, struct seg_desc *desc);
-typedef int	(*vmi_get_cap_t)(void *vcpui, int num, int *retval);
-typedef int	(*vmi_set_cap_t)(void *vcpui, int num, int val);
-typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
-typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
-typedef struct vlapic * (*vmi_vlapic_init)(void *vcpui);
-typedef void	(*vmi_vlapic_cleanup)(struct vlapic *vlapic);
-typedef int	(*vmi_snapshot_vcpu_t)(void *vcpui, struct vm_snapshot_meta *meta);
-typedef int	(*vmi_restore_tsc_t)(void *vcpui, uint64_t now);
+#define	DECLARE_VMMOPS_FUNC(ret_type, opname, args)		\
+	typedef ret_type (*vmmops_##opname##_t) args;		\
+	ret_type vmmops_##opname args
+
+DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum));
+DECLARE_VMMOPS_FUNC(int, modcleanup, (void));
+DECLARE_VMMOPS_FUNC(void, modresume, (void));
+DECLARE_VMMOPS_FUNC(void, modsuspend, (void));
+DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap));
+DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc,
+    struct pmap *pmap, struct vm_eventinfo *info));
+DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi));
+DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+    int vcpu_id));
+DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui));
+DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval));
+DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val));
+DECLARE_VMMOPS_FUNC(int, getdesc, (void *vcpui, int num,
+    struct seg_desc *desc));
+DECLARE_VMMOPS_FUNC(int, setdesc, (void *vcpui, int num,
+    struct seg_desc *desc));
+DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval));
+DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val));
+DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc,
+    (vm_offset_t min, vm_offset_t max));
+DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
+DECLARE_VMMOPS_FUNC(struct vlapic *, vlapic_init, (void *vcpui));
+DECLARE_VMMOPS_FUNC(void, vlapic_cleanup, (struct vlapic *vlapic));
+DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui,
+    struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
 
 struct vmm_ops {
-	vmm_init_func_t		modinit;	/* module wide initialization */
-	vmm_cleanup_func_t	modcleanup;
-	vmm_resume_func_t	modsuspend;
-	vmm_resume_func_t	modresume;
-
-	vmi_init_func_t		init;		/* vm-specific initialization */
-	vmi_run_func_t		run;
-	vmi_cleanup_func_t	cleanup;
-	vmi_vcpu_init_func_t	vcpu_init;
-	vmi_vcpu_cleanup_func_t	vcpu_cleanup;
-	vmi_get_register_t	getreg;
-	vmi_set_register_t	setreg;
-	vmi_get_desc_t		getdesc;
-	vmi_set_desc_t		setdesc;
-	vmi_get_cap_t		getcap;
-	vmi_set_cap_t		setcap;
-	vmi_vmspace_alloc	vmspace_alloc;
-	vmi_vmspace_free	vmspace_free;
-	vmi_vlapic_init		vlapic_init;
-	vmi_vlapic_cleanup	vlapic_cleanup;
+	vmmops_modinit_t	modinit;	/* module wide initialization */
+	vmmops_modcleanup_t	modcleanup;
+	vmmops_modresume_t	modsuspend;
+	vmmops_modresume_t	modresume;
+
+	vmmops_init_t		init;		/* vm-specific initialization */
+	vmmops_run_t		run;
+	vmmops_cleanup_t	cleanup;
+	vmmops_vcpu_init_t	vcpu_init;
+	vmmops_vcpu_cleanup_t	vcpu_cleanup;
+	vmmops_getreg_t		getreg;
+	vmmops_setreg_t		setreg;
+	vmmops_getdesc_t	getdesc;
+	vmmops_setdesc_t	setdesc;
+	vmmops_getcap_t		getcap;
+	vmmops_setcap_t		setcap;
+	vmmops_vmspace_alloc_t	vmspace_alloc;
+	vmmops_vmspace_free_t	vmspace_free;
+	vmmops_vlapic_init_t	vlapic_init;
+	vmmops_vlapic_cleanup_t	vlapic_cleanup;
 
 	/* checkpoint operations */
-	vmi_snapshot_vcpu_t	vcpu_snapshot;
-	vmi_restore_tsc_t	restore_tsc;
+	vmmops_vcpu_snapshot_t	vcpu_snapshot;
+	vmmops_restore_tsc_t	restore_tsc;
 };
 
 extern const struct vmm_ops vmm_ops_intel;
@@ -229,7 +237,7 @@ extern u_int vm_maxcpu;			/* maximum virtual cpus */
 int vm_create(const char *name, struct vm **retvm);
 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
 void vm_disable_vcpu_creation(struct vm *vm);
-void vm_slock_vcpus(struct vm *vm);
+void vm_lock_vcpus(struct vm *vm);
 void vm_unlock_vcpus(struct vm *vm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
@@ -354,6 +362,7 @@ enum vcpu_state {
 };
 
 int vcpu_set_state(struct vcpu *vcpu, enum vcpu_state state, bool from_idle);
+int vcpu_set_state_all(struct vm *vm, enum vcpu_state state);
 enum vcpu_state vcpu_get_state(struct vcpu *vcpu, int *hostcpu);
 
 static int __inline
@@ -375,7 +384,6 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr);
-struct vmspace *vm_vmspace(struct vm *vm);
 struct vm_mem *vm_mem(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index c8579c5da4ad..890cf01c46a0 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -857,7 +857,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
 	}
 }
 
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
@@ -866,7 +866,7 @@ static Elf_Brandnote linux64_brandnote = {
 	.trans_osrel	= linux_trans_osrel
 };
 
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -877,7 +877,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf64_Brandinfo linux_glibc2brandshort = {
+static const Elf64_Brandinfo linux_glibc2brandshort = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -888,7 +888,7 @@ static Elf64_Brandinfo linux_glibc2brandshort = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf64_Brandinfo linux_muslbrand = {
+static const Elf64_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
@@ -900,7 +900,7 @@ static Elf64_Brandinfo linux_muslbrand = {
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
-static Elf64_Brandinfo *linux_brandlist[] = {
+static const Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	&linux_glibc2brandshort,
 	&linux_muslbrand,
@@ -910,7 +910,7 @@ static Elf64_Brandinfo *linux_brandlist[] = {
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
-	Elf64_Brandinfo **brandinfo;
+	const Elf64_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 8fac626f9053..735ebb151017 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -954,7 +954,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
 	}
 }
 
-static Elf_Brandnote linux32_brandnote = {
+static const Elf_Brandnote linux32_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
@@ -963,7 +963,7 @@ static Elf_Brandnote linux32_brandnote = {
 	.trans_osrel	= linux_trans_osrel
 };
 
-static Elf32_Brandinfo linux_brand = {
+static const Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -974,7 +974,7 @@ static Elf32_Brandinfo linux_brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf32_Brandinfo linux_glibc2brand = {
+static const Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -985,7 +985,7 @@ static Elf32_Brandinfo linux_glibc2brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-static Elf32_Brandinfo linux_muslbrand = {
+static const Elf32_Brandinfo linux_muslbrand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
@@ -997,7 +997,7 @@ static Elf32_Brandinfo linux_muslbrand = {
 			    LINUX_BI_FUTEX_REQUEUE
 };
 
-static Elf32_Brandinfo *linux_brandlist[] = {
+static const Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	&linux_muslbrand,
@@ -1007,7 +1007,7 @@ static Elf32_Brandinfo *linux_brandlist[] = {
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
-	Elf32_Brandinfo **brandinfo;
+	const Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
diff --git a/sys/amd64/pt/pt.c b/sys/amd64/pt/pt.c
index c7b75767680a..6b2296de049c 100644
--- a/sys/amd64/pt/pt.c
+++ b/sys/amd64/pt/pt.c
@@ -42,15 +42,15 @@
  */
 
 #include <sys/systm.h>
+#include <sys/bus.h>
 #include <sys/hwt.h>
+#include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
-#include <sys/sdt.h>
 #include <sys/smp.h>
-#include <sys/taskqueue.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
@@ -94,12 +94,7 @@
 
 MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace");
 
-SDT_PROVIDER_DEFINE(pt);
-SDT_PROBE_DEFINE(pt, , , topa__intr);
-
-TASKQUEUE_FAST_DEFINE_THREAD(pt);
-
-static void pt_send_buffer_record(void *arg, int pending __unused);
+static void pt_send_buffer_record(void *arg);
 static int pt_topa_intr(struct trapframe *tf);
 
 /*
@@ -122,29 +117,24 @@ struct pt_buffer {
 	size_t size;
 	struct mtx lock; /* Lock for fields below. */
 	vm_offset_t offset;
-	uint64_t wrap_count;
-	int curpage;
 };
 
 struct pt_ctx {
 	int id;
 	struct pt_buffer buf; /* ToPA buffer metadata */
-	struct task task;     /* ToPA buffer notification task */
 	struct hwt_context *hwt_ctx;
 	uint8_t *save_area; /* PT XSAVE area */
 };
 /* PT tracing contexts used for CPU mode. */
 static struct pt_ctx *pt_pcpu_ctx;
 
-enum pt_cpu_state {
-	PT_DISABLED = 0,
-	PT_STOPPED,
-	PT_ACTIVE
-};
+enum pt_cpu_state { PT_INACTIVE = 0, PT_ACTIVE };
 
 static struct pt_cpu {
 	struct pt_ctx *ctx;	 /* active PT tracing context */
 	enum pt_cpu_state state; /* used as part of trace stop protocol */
+	void *swi_cookie;	 /* Software interrupt handler context */
+	int in_pcint_handler;
 } *pt_pcpu;
 
 /*
@@ -199,31 +189,28 @@ static __inline void
 pt_update_buffer(struct pt_buffer *buf)
 {
 	uint64_t reg;
-	int curpage;
+	uint64_t offset;
 
 	/* Update buffer offset. */
 	reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
-	curpage = (reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT;
-	mtx_lock_spin(&buf->lock);
-	/* Check if the output wrapped. */
-	if (buf->curpage > curpage)
-		buf->wrap_count++;
-	buf->curpage = curpage;
-	buf->offset = reg >> 32;
-	mtx_unlock_spin(&buf->lock);
-
-	dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__,
-	    buf->wrap_count, buf->curpage, buf->offset);
+	offset = ((reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT) * PAGE_SIZE;
+	offset += (reg >> 32);
+
+	atomic_store_rel_64(&buf->offset, offset);
 }
 
 static __inline void
 pt_fill_buffer_record(int id, struct pt_buffer *buf,
     struct hwt_record_entry *rec)
 {
+	vm_offset_t offset;
+
+	offset = atomic_load_acq_64(&buf->offset);
+
 	rec->record_type = HWT_RECORD_BUFFER;
 	rec->buf_id = id;
-	rec->curpage = buf->curpage;
-	rec->offset = buf->offset + (buf->wrap_count * buf->size);
+	rec->curpage = offset / PAGE_SIZE;
+	rec->offset = offset & PAGE_MASK;
 }
 
 /*
@@ -273,9 +260,9 @@ pt_cpu_start(void *dummy)
 	MPASS(cpu->ctx != NULL);
 
 	dprintf("%s: curcpu %d\n", __func__, curcpu);
+	pt_cpu_set_state(curcpu, PT_ACTIVE);
 	load_cr4(rcr4() | CR4_XSAVE);
 	wrmsr(MSR_IA32_RTIT_STATUS, 0);
-	pt_cpu_set_state(curcpu, PT_ACTIVE);
 	pt_cpu_toggle_local(cpu->ctx->save_area, true);
 }
 
@@ -291,16 +278,16 @@ pt_cpu_stop(void *dummy)
 	struct pt_cpu *cpu;
 	struct pt_ctx *ctx;
 
-	/* Shutdown may occur before PT gets properly configured. */
-	if (pt_cpu_get_state(curcpu) == PT_DISABLED)
-		return;
-
 	cpu = &pt_pcpu[curcpu];
 	ctx = cpu->ctx;
-	MPASS(ctx != NULL);
-	dprintf("%s: curcpu %d\n", __func__, curcpu);
 
-	pt_cpu_set_state(curcpu, PT_STOPPED);
+	dprintf("%s: curcpu %d\n", __func__, curcpu);
+	/* Shutdown may occur before PT gets properly configured. */
+	if (ctx == NULL) {
+		dprintf("%s: missing context on cpu %d; bailing\n", __func__,
+		    curcpu);
+		return;
+	}
 	pt_cpu_toggle_local(cpu->ctx->save_area, false);
 	pt_update_buffer(&ctx->buf);
 }
@@ -406,13 +393,11 @@ pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id)
 		return (ENOMEM);
 	dprintf("%s: preparing ToPA buffer\n", __func__);
 	if (pt_topa_prepare(pt_ctx, vm) != 0) {
-		dprintf("%s: failed to prepare ToPA buffer\n", __func__);
 		free(pt_ctx->save_area, M_PT);
 		return (ENOMEM);
 	}
 
 	pt_ctx->id = ctx_id;
-	TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx);
 
 	return (0);
 }
@@ -426,7 +411,6 @@ pt_deinit_ctx(struct pt_ctx *pt_ctx)
 	if (pt_ctx->save_area != NULL)
 		free(pt_ctx->save_area, M_PT);
 	memset(pt_ctx, 0, sizeof(*pt_ctx));
-	pt_ctx->buf.topa_hw = NULL;
 }
 
 /*
@@ -519,7 +503,6 @@ pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id)
 	    XSTATE_XCOMP_BV_COMPACT;
 	pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN;
 	pt_pcpu[cpu_id].ctx = pt_ctx;
-	pt_cpu_set_state(cpu_id, PT_STOPPED);
 
 	return (0);
 }
@@ -549,12 +532,19 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id)
 
 	if (ctx->mode == HWT_MODE_CPU)
 		return;
-
 	KASSERT(curcpu == cpu_id,
 	    ("%s: attempting to disable PT on another cpu", __func__));
+
+	cpu = &pt_pcpu[cpu_id];
+
+	dprintf("%s: waiting for cpu %d to exit interrupt handler\n", __func__,
+	    cpu_id);
+	pt_cpu_set_state(cpu_id, PT_INACTIVE);
+	while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0))
+		;
+
 	pt_cpu_stop(NULL);
 	CPU_CLR(cpu_id, &ctx->cpu_map);
-	cpu = &pt_pcpu[cpu_id];
 	cpu->ctx = NULL;
 }
 
@@ -564,14 +554,14 @@ pt_backend_disable(struct hwt_context *ctx, int cpu_id)
 static int
 pt_backend_enable_smp(struct hwt_context *ctx)
 {
-
 	dprintf("%s\n", __func__);
+
+	KASSERT(ctx->mode == HWT_MODE_CPU,
+	    ("%s: should only be used for CPU mode", __func__));
 	if (ctx->mode == HWT_MODE_CPU &&
 	    atomic_swap_32(&cpu_mode_ctr, 1) != 0)
 		return (-1);
 
-	KASSERT(ctx->mode == HWT_MODE_CPU,
-	    ("%s: should only be used for CPU mode", __func__));
 	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL);
 
 	return (0);
@@ -583,6 +573,7 @@ pt_backend_enable_smp(struct hwt_context *ctx)
 static int
 pt_backend_disable_smp(struct hwt_context *ctx)
 {
+	struct pt_cpu *cpu;
 
 	dprintf("%s\n", __func__);
 	if (ctx->mode == HWT_MODE_CPU &&
@@ -593,6 +584,14 @@ pt_backend_disable_smp(struct hwt_context *ctx)
 		dprintf("%s: empty cpu map\n", __func__);
 		return (-1);
 	}
+	CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+		cpu = &pt_pcpu[cpu_id];
+		dprintf("%s: waiting for cpu %d to exit interrupt handler\n",
+		    __func__, cpu_id);
+		pt_cpu_set_state(cpu_id, PT_INACTIVE);
+		while (atomic_cmpset_int(&cpu->in_pcint_handler, 1, 0))
+			;
+	}
 	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL);
 
 	return (0);
@@ -611,13 +610,13 @@ pt_backend_init(struct hwt_context *ctx)
 	int error;
 
 	dprintf("%s\n", __func__);
-	if (ctx->mode == HWT_MODE_CPU) {
-		TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
-			error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id],
-			    hwt_cpu->vm, hwt_cpu->cpu_id);
-			if (error)
-				return (error);
-		}
+	if (ctx->mode != HWT_MODE_CPU)
+		return (0);
+	TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+		error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id], hwt_cpu->vm,
+		    hwt_cpu->cpu_id);
+		if (error)
+			return (error);
 	}
 
 	return (0);
@@ -647,20 +646,16 @@ pt_backend_deinit(struct hwt_context *ctx)
 			pt_deinit_ctx(pt_ctx);
 		}
 	} else {
-		CPU_FOREACH(cpu_id) {
-			if (!CPU_ISSET(cpu_id, &ctx->cpu_map))
+		CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) {
+			if (pt_pcpu[cpu_id].ctx == NULL)
 				continue;
-			if (pt_pcpu[cpu_id].ctx != NULL) {
-				KASSERT(pt_pcpu[cpu_id].ctx ==
-					&pt_pcpu_ctx[cpu_id],
-				    ("%s: CPU mode tracing with non-cpu mode PT"
-				     "context active",
-					__func__));
-				pt_pcpu[cpu_id].ctx = NULL;
-			}
-			pt_ctx = &pt_pcpu_ctx[cpu_id];
-			pt_deinit_ctx(pt_ctx);
-			memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu));
+			KASSERT(pt_pcpu[cpu_id].ctx == &pt_pcpu_ctx[cpu_id],
+			    ("%s: CPU mode tracing with non-cpu mode PT"
+			     "context active",
+				__func__));
+			pt_deinit_ctx(pt_pcpu[cpu_id].ctx);
+			pt_pcpu[cpu_id].ctx = NULL;
+			atomic_set_int(&pt_pcpu[cpu_id].in_pcint_handler, 0);
 		}
 	}
 
@@ -675,15 +670,15 @@ pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset,
     uint64_t *data)
 {
 	struct pt_buffer *buf;
+	uint64_t offset;
 
 	if (vm->ctx->mode == HWT_MODE_THREAD)
 		buf = &((struct pt_ctx *)vm->thr->private)->buf;
 	else
 		buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf;
-	mtx_lock_spin(&buf->lock);
-	*curpage = buf->curpage;
-	*curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize);
-	mtx_unlock_spin(&buf->lock);
+	offset = atomic_load_acq_64(&buf->offset);
+	*curpage = offset / PAGE_SIZE;
+	*curpage_offset = offset & PAGE_MASK;
 
 	return (0);
 }
@@ -762,15 +757,13 @@ static struct hwt_backend backend = {
  * Used as a taskqueue routine from the ToPA interrupt handler.
  */
 static void
-pt_send_buffer_record(void *arg, int pending __unused)
+pt_send_buffer_record(void *arg)
 {
+	struct pt_cpu *cpu = (struct pt_cpu *)arg;
 	struct hwt_record_entry record;
-	struct pt_ctx *ctx = (struct pt_ctx *)arg;
 
-	/* Prepare buffer record. */
-	mtx_lock_spin(&ctx->buf.lock);
+	struct pt_ctx *ctx = cpu->ctx;
 	pt_fill_buffer_record(ctx->id, &ctx->buf, &record);
-	mtx_unlock_spin(&ctx->buf.lock);
 	hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT);
 }
 static void
@@ -795,36 +788,40 @@ static int
 pt_topa_intr(struct trapframe *tf)
 {
 	struct pt_buffer *buf;
+	struct pt_cpu *cpu;
 	struct pt_ctx *ctx;
 	uint64_t reg;
 
-	SDT_PROBE0(pt, , , topa__intr);
-
-	if (pt_cpu_get_state(curcpu) != PT_ACTIVE) {
-		return (0);
-	}
+	cpu = &pt_pcpu[curcpu];
 	reg = rdmsr(MSR_IA_GLOBAL_STATUS);
 	if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0) {
-		/* ACK spurious or leftover interrupt. */
 		pt_topa_status_clear();
+		return (0);
+	}
+
+	if (pt_cpu_get_state(curcpu) != PT_ACTIVE) {
 		return (1);
 	}
+	atomic_set_int(&cpu->in_pcint_handler, 1);
 
-	ctx = pt_pcpu[curcpu].ctx;
+	ctx = cpu->ctx;
+	KASSERT(ctx != NULL,
+	    ("%s: cpu %d: ToPA PMI interrupt without an active context",
+		__func__, curcpu));
 	buf = &ctx->buf;
 	KASSERT(buf->topa_hw != NULL,
-	    ("%s: ToPA PMI interrupt with invalid buffer", __func__));
-
+	    ("%s: cpu %d: ToPA PMI interrupt with invalid buffer", __func__,
+		curcpu));
 	pt_cpu_toggle_local(ctx->save_area, false);
 	pt_update_buffer(buf);
 	pt_topa_status_clear();
-	taskqueue_enqueue_flags(taskqueue_pt, &ctx->task,
-	    TASKQUEUE_FAIL_IF_PENDING);
 
 	if (pt_cpu_get_state(curcpu) == PT_ACTIVE) {
+		swi_sched(cpu->swi_cookie, SWI_FROMNMI);
 		pt_cpu_toggle_local(ctx->save_area, true);
 		lapic_reenable_pcint();
 	}
+	atomic_set_int(&cpu->in_pcint_handler, 0);
 	return (1);
 }
 
@@ -839,7 +836,7 @@ static int
 pt_init(void)
 {
 	u_int cp[4];
-	int error;
+	int error, i;
 
 	dprintf("pt: Enumerating part 1\n");
 	cpuid_count(CPUID_PT_LEAF, 0, cp);
@@ -869,20 +866,38 @@ pt_init(void)
 	pt_pcpu_ctx = mallocarray(mp_ncpus, sizeof(struct pt_ctx), M_PT,
 	    M_ZERO | M_WAITOK);
 
+	for (i = 0; i < mp_ncpus; i++) {
+		error = swi_add(&clk_intr_event, "pt", pt_send_buffer_record,
+		    &pt_pcpu[i], SWI_CLOCK, INTR_MPSAFE,
+		    &pt_pcpu[i].swi_cookie);
+		if (error != 0) {
+			dprintf(
+			    "%s: failed to add interrupt handler for cpu: %d\n",
+			    __func__, error);
+			goto err;
+		}
+	}
+
 	nmi_register_handler(pt_topa_intr);
-	if (!lapic_enable_pcint()) {
-		nmi_remove_handler(pt_topa_intr);
-		hwt_backend_unregister(&backend);
-		free(pt_pcpu, M_PT);
-		free(pt_pcpu_ctx, M_PT);
-		pt_pcpu = NULL;
-		pt_pcpu_ctx = NULL;
+	if (lapic_enable_pcint()) {
+		initialized = true;
+		return (0);
+	} else
 		printf("pt: failed to setup interrupt line\n");
-		return (error);
+err:
+	nmi_remove_handler(pt_topa_intr);
+	hwt_backend_unregister(&backend);
+
+	for (i = 0; i < mp_ncpus; i++) {
+		if (pt_pcpu[i].swi_cookie != 0)
+			swi_remove(pt_pcpu[i].swi_cookie);
 	}
-	initialized = true;
+	free(pt_pcpu, M_PT);
+	free(pt_pcpu_ctx, M_PT);
+	pt_pcpu = NULL;
+	pt_pcpu_ctx = NULL;
 
-	return (0);
+	return (error);
 }
 
 /*
@@ -941,14 +956,24 @@ pt_supported(void)
 static void
 pt_deinit(void)
 {
+	int i;
+	struct pt_cpu *cpu;
+
 	if (!initialized)
 		return;
 	nmi_remove_handler(pt_topa_intr);
 	lapic_disable_pcint();
 	hwt_backend_unregister(&backend);
+
+	for (i = 0; i < mp_ncpus; i++) {
+		cpu = &pt_pcpu[i];
+		swi_remove(cpu->swi_cookie);
+	}
+
 	free(pt_pcpu, M_PT);
 	free(pt_pcpu_ctx, M_PT);
 	pt_pcpu = NULL;
+	pt_pcpu_ctx = NULL;
 	initialized = false;
 }
 
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c42da02d0bf6..f7c59847140b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -163,7 +163,6 @@ struct vm {
 	void		*rendezvous_arg;	/* (x) [r] rendezvous func/arg */
 	vm_rendezvous_func_t rendezvous_func;
 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
-	struct vmspace	*vmspace;		/* (o) guest's address space */
 	struct vm_mem	mem;			/* (i) [m+v] guest memory */
 	char		name[VM_MAX_NAMELEN+1];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (o) guest vcpus */
@@ -201,7 +200,7 @@ vmmops_panic(void)
 }
 
 #define	DEFINE_VMMOPS_IFUNC(ret_type, opname, args)			\
-    DEFINE_IFUNC(static, ret_type, vmmops_##opname, args)		\
+    DEFINE_IFUNC(, ret_type, vmmops_##opname, args)			\
     {									\
     	if (vmm_is_intel())						\
     		return (vmm_ops_intel.opname);				\
@@ -499,7 +498,7 @@ MODULE_VERSION(vmm, 1);
 static void
 vm_init(struct vm *vm, bool create)
 {
-	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+	vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
 	vm->iommu = NULL;
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
@@ -563,9 +562,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
 }
 
 void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
 {
-	sx_slock(&vm->vcpus_init_lock);
+	sx_xlock(&vm->vcpus_init_lock);
 }
 
 void
@@ -584,7 +583,7 @@ int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
-	struct vmspace *vmspace;
+	int error;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
@@ -597,14 +596,13 @@ vm_create(const char *name, struct vm **retvm)
 	    VM_MAX_NAMELEN + 1)
 		return (EINVAL);
 
-	vmspace = vmmops_vmspace_alloc(0, VM_MAXUSER_ADDRESS_LA48);
-	if (vmspace == NULL)
-		return (ENOMEM);
-
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
+	error = vm_mem_init(&vm->mem, 0, VM_MAXUSER_ADDRESS_LA48);
+	if (error != 0) {
+		free(vm, M_VM);
+		return (error);
+	}
 	strcpy(vm->name, name);
-	vm->vmspace = vmspace;
-	vm_mem_init(&vm->mem);
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm_maxcpu, M_VM, M_WAITOK |
@@ -685,9 +683,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 	if (destroy) {
 		vm_mem_destroy(vm);
 
-		vmmops_vmspace_free(vm->vmspace);
-		vm->vmspace = NULL;
-
 		free(vm->vcpu, M_VM);
 		sx_destroy(&vm->vcpus_init_lock);
 		mtx_destroy(&vm->rendezvous_mtx);
@@ -731,7 +726,7 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	vm_object_t obj;
 
-	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
+	if ((obj = vmm_mmio_alloc(vm_vmspace(vm), gpa, len, hpa)) == NULL)
 		return (ENOMEM);
 	else
 		return (0);
@@ -741,19 +736,21 @@ int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 
-	vmm_mmio_free(vm->vmspace, gpa, len);
+	vmm_mmio_free(vm_vmspace(vm), gpa, len);
 	return (0);
 }
 
 static int
 vm_iommu_map(struct vm *vm)
 {
+	pmap_t pmap;
 	vm_paddr_t gpa, hpa;
 	struct vm_mem_map *mm;
 	int error, i;
 
 	sx_assert(&vm->mem.mem_segs_lock, SX_LOCKED);
 
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		if (!vm_memseg_sysmem(vm, i))
 			continue;
@@ -767,7 +764,7 @@ vm_iommu_map(struct vm *vm)
 		mm->flags |= VM_MEMMAP_F_IOMMU;
 
 		for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
-			hpa = pmap_extract(vmspace_pmap(vm->vmspace), gpa);
+			hpa = pmap_extract(pmap, gpa);
 
 			/*
 			 * All mappings in the vmm vmspace must be
@@ -816,7 +813,7 @@ vm_iommu_unmap(struct vm *vm)
 
 		for (gpa = mm->gpa; gpa < mm->gpa + mm->len; gpa += PAGE_SIZE) {
 			KASSERT(vm_page_wired(PHYS_TO_VM_PAGE(pmap_extract(
-			    vmspace_pmap(vm->vmspace), gpa))),
+			    vmspace_pmap(vm_vmspace(vm)), gpa))),
 			    ("vm_iommu_unmap: vm %p gpa %jx not wired",
 			    vm, (uintmax_t)gpa));
 			iommu_remove_mapping(vm->iommu, gpa, PAGE_SIZE);
@@ -993,6 +990,54 @@ save_guest_fpustate(struct vcpu *vcpu)
 
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
+/*
+ * Invoke the rendezvous function on the specified vcpu if applicable.  Return
+ * true if the rendezvous is finished, false otherwise.
+ */
+static bool
+vm_rendezvous(struct vcpu *vcpu)
+{
+	struct vm *vm = vcpu->vm;
+	int vcpuid;
+
+	mtx_assert(&vcpu->vm->rendezvous_mtx, MA_OWNED);
+	KASSERT(vcpu->vm->rendezvous_func != NULL,
+	    ("vm_rendezvous: no rendezvous pending"));
+
+	/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
+	CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus,
+	    &vm->active_cpus);
+
+	vcpuid = vcpu->vcpuid;
+	if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
+	    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
+		VMM_CTR0(vcpu, "Calling rendezvous func");
+		(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
+		CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
+	}
+	if (CPU_CMP(&vm->rendezvous_req_cpus,
+	    &vm->rendezvous_done_cpus) == 0) {
+		VMM_CTR0(vcpu, "Rendezvous completed");
+		CPU_ZERO(&vm->rendezvous_req_cpus);
+		vm->rendezvous_func = NULL;
+		wakeup(&vm->rendezvous_func);
+		return (true);
+	}
+	return (false);
+}
+
+static void
+vcpu_wait_idle(struct vcpu *vcpu)
+{
+	KASSERT(vcpu->state != VCPU_IDLE, ("vcpu already idle"));
+
+	vcpu->reqidle = 1;
+	vcpu_notify_event_locked(vcpu, false);
+	VMM_CTR1(vcpu, "vcpu state change from %s to "
+	    "idle requested", vcpu_state2str(vcpu->state));
+	msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+}
+
 static int
 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
     bool from_idle)
@@ -1007,13 +1052,8 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
-		while (vcpu->state != VCPU_IDLE) {
-			vcpu->reqidle = 1;
-			vcpu_notify_event_locked(vcpu, false);
-			VMM_CTR1(vcpu, "vcpu state change from %s to "
-			    "idle requested", vcpu_state2str(vcpu->state));
-			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
-		}
+		while (vcpu->state != VCPU_IDLE)
+			vcpu_wait_idle(vcpu);
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
@@ -1065,6 +1105,95 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 	return (0);
 }
 
+/*
+ * Try to lock all of the vCPUs in the VM while taking care to avoid deadlocks
+ * with vm_smp_rendezvous().
+ *
+ * The complexity here suggests that the rendezvous mechanism needs a rethink.
+ */
+int
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
+{
+	cpuset_t locked;
+	struct vcpu *vcpu;
+	int error, i;
+	uint16_t maxcpus;
+
+	KASSERT(newstate != VCPU_IDLE,
+	    ("vcpu_set_state_all: invalid target state %d", newstate));
+
+	error = 0;
+	CPU_ZERO(&locked);
+	maxcpus = vm->maxcpus;
+
+	mtx_lock(&vm->rendezvous_mtx);
+restart:
+	if (vm->rendezvous_func != NULL) {
+		/*
+		 * If we have a pending rendezvous, then the initiator may be
+		 * blocked waiting for other vCPUs to execute the callback.  The
+		 * current thread may be a vCPU thread so we must not block
+		 * waiting for the initiator, otherwise we get a deadlock.
+		 * Thus, execute the callback on behalf of any idle vCPUs.
+		 */
+		for (i = 0; i < maxcpus; i++) {
+			vcpu = vm_vcpu(vm, i);
+			if (vcpu == NULL)
+				continue;
+			vcpu_lock(vcpu);
+			if (vcpu->state == VCPU_IDLE) {
+				(void)vcpu_set_state_locked(vcpu, VCPU_FROZEN,
+				    true);
+				CPU_SET(i, &locked);
+			}
+			if (CPU_ISSET(i, &locked)) {
+				/*
+				 * We can safely execute the callback on this
+				 * vCPU's behalf.
+				 */
+				vcpu_unlock(vcpu);
+				(void)vm_rendezvous(vcpu);
+				vcpu_lock(vcpu);
+			}
+			vcpu_unlock(vcpu);
+		}
+	}
+
+	/*
+	 * Now wait for remaining vCPUs to become idle.  This may include the
+	 * initiator of a rendezvous that is currently blocked on the rendezvous
+	 * mutex.
+	 */
+	CPU_FOREACH_ISCLR(i, &locked) {
+		if (i >= maxcpus)
+			break;
+		vcpu = vm_vcpu(vm, i);
+		if (vcpu == NULL)
+			continue;
+		vcpu_lock(vcpu);
+		while (vcpu->state != VCPU_IDLE) {
+			mtx_unlock(&vm->rendezvous_mtx);
+			vcpu_wait_idle(vcpu);
+			vcpu_unlock(vcpu);
+			mtx_lock(&vm->rendezvous_mtx);
+			if (vm->rendezvous_func != NULL)
+				goto restart;
+			vcpu_lock(vcpu);
+		}
+		error = vcpu_set_state_locked(vcpu, newstate, true);
+		vcpu_unlock(vcpu);
+		if (error != 0) {
+			/* Roll back state changes. */
+			CPU_FOREACH_ISSET(i, &locked)
+				(void)vcpu_set_state(vcpu, VCPU_IDLE, false);
+			break;
+		}
+		CPU_SET(i, &locked);
+	}
+	mtx_unlock(&vm->rendezvous_mtx);
+	return (error);
+}
+
 static void
 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
 {
@@ -1086,36 +1215,23 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 static int
 vm_handle_rendezvous(struct vcpu *vcpu)
 {
-	struct vm *vm = vcpu->vm;
+	struct vm *vm;
 	struct thread *td;
-	int error, vcpuid;
 
-	error = 0;
-	vcpuid = vcpu->vcpuid;
 	td = curthread;
+	vm = vcpu->vm;
+
 	mtx_lock(&vm->rendezvous_mtx);
 	while (vm->rendezvous_func != NULL) {
-		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
-		CPU_AND(&vm->rendezvous_req_cpus, &vm->rendezvous_req_cpus, &vm->active_cpus);
-
-		if (CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
-		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
-			VMM_CTR0(vcpu, "Calling rendezvous func");
-			(*vm->rendezvous_func)(vcpu, vm->rendezvous_arg);
-			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
-		}
-		if (CPU_CMP(&vm->rendezvous_req_cpus,
-		    &vm->rendezvous_done_cpus) == 0) {
-			VMM_CTR0(vcpu, "Rendezvous completed");
-			CPU_ZERO(&vm->rendezvous_req_cpus);
-			vm->rendezvous_func = NULL;
-			wakeup(&vm->rendezvous_func);
+		if (vm_rendezvous(vcpu))
 			break;
-		}
+
 		VMM_CTR0(vcpu, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", hz);
 		if (td_ast_pending(td, TDA_SUSPEND)) {
+			int error;
+
 			mtx_unlock(&vm->rendezvous_mtx);
 			error = thread_check_susp(td, true);
 			if (error != 0)
@@ -1249,7 +1365,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
-		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
+		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm_vmspace(vm)),
 		    vme->u.paging.gpa, ftype);
 		if (rv == 0) {
 			VMM_CTR2(vcpu, "%s bit emulation for gpa %#lx",
@@ -1259,7 +1375,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 		}
 	}
 
-	map = &vm->vmspace->vm_map;
+	map = &vm_vmspace(vm)->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
 
 	VMM_CTR3(vcpu, "vm_handle_paging rv = %d, gpa = %#lx, "
@@ -1560,7 +1676,7 @@ vm_run(struct vcpu *vcpu)
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
-	pmap = vmspace_pmap(vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = &vm->rendezvous_req_cpus;
 	evinfo.sptr = &vm->suspend;
@@ -2302,12 +2418,6 @@ vcpu_notify_event(struct vcpu *vcpu, bool lapic_intr)
 	vcpu_unlock(vcpu);
 }
 
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
-	return (vm->vmspace);
-}
-
 struct vm_mem *
 vm_mem(struct vm *vm)
 {
@@ -2519,7 +2629,7 @@ vm_get_rescnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
 
 	if (vcpu->vcpuid == 0) {
 		vmm_stat_set(vcpu, VMM_MEM_RESIDENT, PAGE_SIZE *
-		    vmspace_resident_count(vcpu->vm->vmspace));
+		    vmspace_resident_count(vm_vmspace(vcpu->vm)));
 	}
 }
 
@@ -2529,7 +2639,7 @@ vm_get_wiredcnt(struct vcpu *vcpu, struct vmm_stat_type *stat)
 
 	if (vcpu->vcpuid == 0) {
 		vmm_stat_set(vcpu, VMM_MEM_WIRED, PAGE_SIZE *
-		    pmap_wired_count(vmspace_pmap(vcpu->vm->vmspace)));
+		    pmap_wired_count(vmspace_pmap(vm_vmspace(vcpu->vm))));
 	}
 }
 
diff --git a/sys/amd64/vmm/vmm_dev_machdep.c b/sys/amd64/vmm/vmm_dev_machdep.c
index d8d2b460404c..dfebc9dcadbf 100644
--- a/sys/amd64/vmm/vmm_dev_machdep.c
+++ b/sys/amd64/vmm/vmm_dev_machdep.c
@@ -48,6 +48,7 @@
 #include <x86/apicreg.h>
 
 #include <dev/vmm/vmm_dev.h>
+#include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
 #include "vmm_lapic.h"
diff --git a/sys/arm/allwinner/aw_sid.c b/sys/arm/allwinner/aw_sid.c
index ba5faca33c5e..932c2f189e51 100644
--- a/sys/arm/allwinner/aw_sid.c
+++ b/sys/arm/allwinner/aw_sid.c
@@ -297,7 +297,7 @@ aw_sid_attach(device_t dev)
 	/* Register ourself so device can resolve who we are */
 	OF_device_register_xref(OF_xref_from_node(node), dev);
 
-	for (i = 0; i < sc->sid_conf->nfuses ;i++) {\
+	for (i = 0; i < sc->sid_conf->nfuses; i++) {
 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 		    OID_AUTO, sc->sid_conf->efuses[i].name,
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 78883296c5b7..6a0ece1e4d98 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -1246,7 +1246,7 @@ pmap_bootstrap(vm_offset_t firstaddr)
 }
 
 static void
-pmap_init_reserved_pages(void)
+pmap_init_reserved_pages(void *dummy __unused)
 {
 	struct pcpu *pc;
 	vm_offset_t pages;
diff --git a/sys/arm/arm/unwind.c b/sys/arm/arm/unwind.c
index 7ad91a3e01a5..0d77074fae34 100644
--- a/sys/arm/arm/unwind.c
+++ b/sys/arm/arm/unwind.c
@@ -278,7 +278,7 @@ unwind_module_unloaded(struct linker_file *lf)
  * the unwind tables might be stripped, so instead we have to use the
  * _exidx_start/end symbols created by ldscript.arm.
  */
-static int
+static void
 module_info_init(void *arg __unused)
 {
 	struct linker_file thekernel;
@@ -291,8 +291,6 @@ module_info_init(void *arg __unused)
 	thekernel.exidx_addr = CADDR(&_exidx_start);
 	thekernel.exidx_size = UADDR(&_exidx_end) - UADDR(&_exidx_start);
 	populate_module_info(create_module_info(), &thekernel);
-
-	return (0);
 }
 SYSINIT(unwind_init, SI_SUB_KMEM, SI_ORDER_ANY, module_info_init, NULL);
 
diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c
index 989924bc0567..b876703a2a15 100644
--- a/sys/arm64/arm64/cpu_errata.c
+++ b/sys/arm64/arm64/cpu_errata.c
@@ -52,56 +52,11 @@ struct cpu_quirks {
 	u_int		flags;
 };
 
-static enum {
-	SSBD_FORCE_ON,
-	SSBD_FORCE_OFF,
-	SSBD_KERNEL,
-} ssbd_method = SSBD_KERNEL;
-
-static cpu_quirk_install install_psci_bp_hardening;
-static cpu_quirk_install install_ssbd_workaround;
 static cpu_quirk_install install_thunderx_bcast_tlbi_workaround;
 
 static struct cpu_quirks cpu_quirks[] = {
 	{
 		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
-		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
-		.quirk_install = install_psci_bp_hardening,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
-		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
-		.quirk_install = install_psci_bp_hardening,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
-		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
-		.quirk_install = install_psci_bp_hardening,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
-		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
-		.quirk_install = install_psci_bp_hardening,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
-		.midr_value =
-		    CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
-		.quirk_install = install_psci_bp_hardening,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = 0,
-		.midr_value = 0,
-		.quirk_install = install_ssbd_workaround,
-		.flags = CPU_QUIRK_POST_DEVICE,
-	},
-	{
-		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
 		.midr_value =
 		    CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0),
 		.quirk_install = install_thunderx_bcast_tlbi_workaround,
@@ -114,57 +69,6 @@ static struct cpu_quirks cpu_quirks[] = {
 	},
 };
 
-static void
-install_psci_bp_hardening(void)
-{
-	/* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
-	if (!psci_present)
-		return;
-
-	if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS)
-		return;
-
-	PCPU_SET(bp_harden, smccc_arch_workaround_1);
-}
-
-static void
-install_ssbd_workaround(void)
-{
-	char *env;
-
-	if (PCPU_GET(cpuid) == 0) {
-		env = kern_getenv("kern.cfg.ssbd");
-		if (env != NULL) {
-			if (strcmp(env, "force-on") == 0) {
-				ssbd_method = SSBD_FORCE_ON;
-			} else if (strcmp(env, "force-off") == 0) {
-				ssbd_method = SSBD_FORCE_OFF;
-			}
-		}
-	}
-
-	/* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
-	if (!psci_present)
-		return;
-
-	/* Enable the workaround on this CPU if it's enabled in the firmware */
-	if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
-		return;
-
-	switch(ssbd_method) {
-	case SSBD_FORCE_ON:
-		smccc_arch_workaround_2(1);
-		break;
-	case SSBD_FORCE_OFF:
-		smccc_arch_workaround_2(0);
-		break;
-	case SSBD_KERNEL:
-	default:
-		PCPU_SET(ssbd, smccc_arch_workaround_2);
-		break;
-	}
-}
-
 /*
  * Workaround Cavium erratum 27456.
  *
diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c
index 13af5c5065d6..207b37180a26 100644
--- a/sys/arm64/arm64/elf_machdep.c
+++ b/sys/arm64/arm64/elf_machdep.c
@@ -121,7 +121,7 @@ static struct sysentvec elf64_freebsd_sysvec = {
 };
 INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
 
-static Elf64_Brandinfo freebsd_brand_info = {
+static const Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "FreeBSD",
@@ -131,8 +131,7 @@ static Elf64_Brandinfo freebsd_brand_info = {
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
-
-SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
+C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
 
 static bool
@@ -336,7 +335,7 @@ elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
 	return (0);
 }
 
-static Elf_Note gnu_property_note = {
+static const Elf_Note gnu_property_note = {
 	.n_namesz = sizeof(GNU_ABI_VENDOR),
 	.n_descsz = 16,
 	.n_type = NT_GNU_PROPERTY_TYPE_0,
diff --git a/sys/arm64/arm64/spec_workaround.c b/sys/arm64/arm64/spec_workaround.c
new file mode 100644
index 000000000000..7f4f86cdb48c
--- /dev/null
+++ b/sys/arm64/arm64/spec_workaround.c
@@ -0,0 +1,166 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Arm Ltd
+ * Copyright (c) 2018 Andrew Turner
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+#include <machine/cpu_feat.h>
+
+#include <dev/psci/psci.h>
+#include <dev/psci/smccc.h>
+
+static enum {
+	SSBD_FORCE_ON,
+	SSBD_FORCE_OFF,
+	SSBD_KERNEL,
+} ssbd_method = SSBD_KERNEL;
+
+struct psci_bp_hardening_impl {
+	u_int		midr_mask;
+	u_int		midr_value;
+};
+
+static struct psci_bp_hardening_impl psci_bp_hardening_impl[] = {
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
+	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
+	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
+	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
+	},
+	{
+		.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+		.midr_value =
+		    CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
+	}
+};
+
+static cpu_feat_en
+psci_bp_hardening_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+	size_t i;
+
+	for (i = 0; i < nitems(psci_bp_hardening_impl); i++) {
+		if ((midr & psci_bp_hardening_impl[i].midr_mask) ==
+		    psci_bp_hardening_impl[i].midr_value) {
+			/* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+			if (!psci_present)
+				return (FEAT_ALWAYS_DISABLE);
+
+			if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) !=
+			    SMCCC_RET_SUCCESS)
+				return (FEAT_ALWAYS_DISABLE);
+
+			return (FEAT_DEFAULT_ENABLE);
+		}
+	}
+
+	return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+psci_bp_hardening_enable(const struct cpu_feat *feat __unused,
+    cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+    u_int errata_count __unused)
+{
+	PCPU_SET(bp_harden, smccc_arch_workaround_1);
+
+	return (true);
+}
+
+CPU_FEAT(feat_csv2_missing, "Branch Predictor Hardening",
+	psci_bp_hardening_check, NULL, psci_bp_hardening_enable, NULL,
+        CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+ssbd_workaround_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+{
+	char *env;
+
+	if (PCPU_GET(cpuid) == 0) {
+		env = kern_getenv("kern.cfg.ssbd");
+		if (env != NULL) {
+			if (strcmp(env, "force-on") == 0) {
+				ssbd_method = SSBD_FORCE_ON;
+			} else if (strcmp(env, "force-off") == 0) {
+				ssbd_method = SSBD_FORCE_OFF;
+			}
+		}
+	}
+
+	/* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+	if (!psci_present)
+		return (FEAT_ALWAYS_DISABLE);
+
+	/* Enable the workaround on this CPU if it's enabled in the firmware */
+	if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
+		return (FEAT_ALWAYS_DISABLE);
+
+	return (FEAT_DEFAULT_ENABLE);
+}
+
+static bool
+ssbd_workaround_enable(const struct cpu_feat *feat __unused,
+    cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+    u_int errata_count __unused)
+{
+	switch(ssbd_method) {
+	case SSBD_FORCE_ON:
+		smccc_arch_workaround_2(1);
+		break;
+	case SSBD_FORCE_OFF:
+		smccc_arch_workaround_2(0);
+		break;
+	case SSBD_KERNEL:
+	default:
+		PCPU_SET(ssbd, smccc_arch_workaround_2);
+		break;
+	}
+
+	return (true);
+}
+
+CPU_FEAT(feat_ssbs_missing, "Speculator Store Bypass Disable Workaround",
+	ssbd_workaround_check, NULL, ssbd_workaround_enable, NULL,
+        CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
diff --git a/sys/arm64/coresight/coresight.c b/sys/arm64/coresight/coresight.c
index 5928c153f4ae..9b9d3c65ecc9 100644
--- a/sys/arm64/coresight/coresight.c
+++ b/sys/arm64/coresight/coresight.c
@@ -113,7 +113,7 @@ coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp)
 }
 
 static void
-coresight_init(void)
+coresight_init(void *dummy __unused)
 {
 
 	mtx_init(&cs_mtx, "ARM Coresight", NULL, MTX_DEF);
diff --git a/sys/arm64/include/vmm.h b/sys/arm64/include/vmm.h
index e839b5dd92c9..696a69669a2a 100644
--- a/sys/arm64/include/vmm.h
+++ b/sys/arm64/include/vmm.h
@@ -143,10 +143,41 @@ struct vm_eventinfo {
 	int	*iptr;		/* reqidle cookie */
 };
 
+#define	DECLARE_VMMOPS_FUNC(ret_type, opname, args)			\
+	ret_type vmmops_##opname args
+
+DECLARE_VMMOPS_FUNC(int, modinit, (int ipinum));
+DECLARE_VMMOPS_FUNC(int, modcleanup, (void));
+DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap));
+DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa, int *is_fault));
+DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+    struct vm_eventinfo *info));
+DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi));
+DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+    int vcpu_id));
+DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui));
+DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far));
+DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval));
+DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val));
+DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval));
+DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val));
+DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+    vm_offset_t max));
+DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
+#ifdef notyet
+#ifdef BHYVE_SNAPSHOT
+DECLARE_VMMOPS_FUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, vcpu_snapshot, (void *vcpui,
+    struct vm_snapshot_meta *meta));
+DECLARE_VMMOPS_FUNC(int, restore_tsc, (void *vcpui, uint64_t now));
+#endif
+#endif
+
 int vm_create(const char *name, struct vm **retvm);
 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
 void vm_disable_vcpu_creation(struct vm *vm);
-void vm_slock_vcpus(struct vm *vm);
+void vm_lock_vcpus(struct vm *vm);
 void vm_unlock_vcpus(struct vm *vm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
@@ -232,7 +263,6 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu);
-struct vmspace *vm_vmspace(struct vm *vm);
 struct vm_mem *vm_mem(struct vm *vm);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c
index 084b7a11b01f..ac05820f89bc 100644
--- a/sys/arm64/linux/linux_sysvec.c
+++ b/sys/arm64/linux/linux_sysvec.c
@@ -584,7 +584,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
 	}
 }
 
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
@@ -593,7 +593,7 @@ static Elf_Brandnote linux64_brandnote = {
 	.trans_osrel	= linux_trans_osrel
 };
 
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "Linux",
@@ -604,7 +604,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
-Elf64_Brandinfo *linux_brandlist[] = {
+const Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	NULL
 };
@@ -612,8 +612,8 @@ Elf64_Brandinfo *linux_brandlist[] = {
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
-	Elf64_Brandinfo **brandinfo;
-	struct linux_ioctl_handler**lihp;
+	const Elf64_Brandinfo **brandinfo;
+	struct linux_ioctl_handler **lihp;
 	int error;
 
 	error = 0;
diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
index f9b74aef7188..f530dab05331 100644
--- a/sys/arm64/vmm/arm64.h
+++ b/sys/arm64/vmm/arm64.h
@@ -136,37 +136,6 @@ struct hyp {
 	struct hypctx	*ctx[];
 };
 
-#define	DEFINE_VMMOPS_IFUNC(ret_type, opname, args)			\
-	ret_type vmmops_##opname args;
-
-DEFINE_VMMOPS_IFUNC(int, modinit, (int ipinum))
-DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
-DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
-DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
-DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
-    struct vm_eventinfo *info))
-DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
-DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
-    int vcpu_id))
-DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
-DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t esr, uint64_t far))
-DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
-DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
-DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
-DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
-DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
-    vm_offset_t max))
-DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
-#ifdef notyet
-#ifdef BHYVE_SNAPSHOT
-DEFINE_VMMOPS_IFUNC(int, snapshot, (void *vmi, struct vm_snapshot_meta *meta))
-DEFINE_VMMOPS_IFUNC(int, vcpu_snapshot, (void *vcpui,
-    struct vm_snapshot_meta *meta))
-DEFINE_VMMOPS_IFUNC(int, restore_tsc, (void *vcpui, uint64_t now))
-#endif
-#endif
-
 uint64_t	vmm_call_hyp(uint64_t, ...);
 
 #if 0
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index a551a2807183..bf52dc0fe916 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -88,7 +88,6 @@ struct vcpu {
 	struct vfpstate	*guestfpu;	/* (a,i) guest fpu state */
 };
 
-#define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock_destroy(v)	mtx_destroy(&((v)->mtx))
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
@@ -126,7 +125,6 @@ struct vm {
 	bool		dying;			/* (o) is dying */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
-	struct vmspace	*vmspace;		/* (o) guest's address space */
 	struct vm_mem	mem;			/* (i) guest memory */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (i) guest vcpus */
@@ -274,6 +272,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy)
 		vmm_stat_free(vcpu->stats);
 		fpu_save_area_free(vcpu->guestfpu);
 		vcpu_lock_destroy(vcpu);
+		free(vcpu, M_VMM);
 	}
 }
 
@@ -407,7 +406,7 @@ vm_init(struct vm *vm, bool create)
 {
 	int i;
 
-	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+	vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
 	MPASS(vm->cookie != NULL);
 
 	CPU_ZERO(&vm->active_cpus);
@@ -470,9 +469,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
 }
 
 void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
 {
-	sx_slock(&vm->vcpus_init_lock);
+	sx_xlock(&vm->vcpus_init_lock);
 }
 
 void
@@ -485,7 +484,7 @@ int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
-	struct vmspace *vmspace;
+	int error;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
@@ -497,14 +496,13 @@ vm_create(const char *name, struct vm **retvm)
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
-	vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
-	if (vmspace == NULL)
-		return (ENOMEM);
-
 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+	error = vm_mem_init(&vm->mem, 0, 1ul << 39);
+	if (error != 0) {
+		free(vm, M_VMM);
+		return (error);
+	}
 	strcpy(vm->name, name);
-	vm->vmspace = vmspace;
-	vm_mem_init(&vm->mem);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 
 	vm->sockets = 1;
@@ -558,7 +556,7 @@ vm_cleanup(struct vm *vm, bool destroy)
 
 	if (destroy) {
 		vm_xlock_memsegs(vm);
-		pmap = vmspace_pmap(vm->vmspace);
+		pmap = vmspace_pmap(vm_vmspace(vm));
 		sched_pin();
 		PCPU_SET(curvmpmap, NULL);
 		sched_unpin();
@@ -582,11 +580,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 	if (destroy) {
 		vm_mem_destroy(vm);
 
-		vmmops_vmspace_free(vm->vmspace);
-		vm->vmspace = NULL;
-
-		for (i = 0; i < vm->maxcpus; i++)
-			free(vm->vcpu[i], M_VMM);
 		free(vm->vcpu, M_VMM);
 		sx_destroy(&vm->vcpus_init_lock);
 	}
@@ -1090,12 +1083,6 @@ vcpu_notify_event(struct vcpu *vcpu)
 	vcpu_unlock(vcpu);
 }
 
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
-	return (vm->vmspace);
-}
-
 struct vm_mem *
 vm_mem(struct vm *vm)
 {
@@ -1416,7 +1403,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 
 	vme = &vcpu->exitinfo;
 
-	pmap = vmspace_pmap(vcpu->vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vcpu->vm));
 	addr = vme->u.paging.gpa;
 	esr = vme->u.paging.esr;
 
@@ -1433,7 +1420,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 		panic("%s: Invalid exception (esr = %lx)", __func__, esr);
 	}
 
-	map = &vm->vmspace->vm_map;
+	map = &vm_vmspace(vm)->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
 	if (rv != KERN_SUCCESS)
 		return (EFAULT);
@@ -1507,7 +1494,7 @@ vm_run(struct vcpu *vcpu)
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
-	pmap = vmspace_pmap(vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = NULL;
 	evinfo.sptr = &vm->suspend;
diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c
index b518f84454ad..fd128e69f1f1 100644
--- a/sys/cam/scsi/scsi_all.c
+++ b/sys/cam/scsi/scsi_all.c
@@ -112,7 +112,7 @@ static void	fetchtableentries(int sense_key, int asc, int ascq,
 				  const struct asc_table_entry **);
 
 #ifdef _KERNEL
-static void	init_scsi_delay(void);
+static void	init_scsi_delay(void *);
 static int	sysctl_scsi_delay(SYSCTL_HANDLER_ARGS);
 static int	set_scsi_delay(int delay);
 #endif
@@ -686,7 +686,7 @@ scsi_op_desc(uint16_t opcode, struct scsi_inquiry_data *inq_data)
 	opmask = 1 << pd_type;
 
 	for (j = 0; j < num_tables; j++) {
-		for (i = 0;i < num_ops[j] && table[j][i].opcode <= opcode; i++){
+		for (i = 0; i < num_ops[j] && table[j][i].opcode <= opcode; i++) {
 			if ((table[j][i].opcode == opcode)
 			 && ((table[j][i].opmask & opmask) != 0))
 				return(table[j][i].desc);
@@ -9379,7 +9379,7 @@ scsi_vpd_supported_page(struct cam_periph *periph, uint8_t page_id)
 }
 
 static void
-init_scsi_delay(void)
+init_scsi_delay(void *dummy __unused)
 {
 	int delay;
 
diff --git a/sys/cam/scsi/scsi_enc_ses.c b/sys/cam/scsi/scsi_enc_ses.c
index 435874a9874a..3a362eaf11a4 100644
--- a/sys/cam/scsi/scsi_enc_ses.c
+++ b/sys/cam/scsi/scsi_enc_ses.c
@@ -2302,7 +2302,7 @@ ses_print_addl_data_sas_type0(char *sesname, struct sbuf *sbp,
 	sbuf_putc(sbp, '\n');
 	if (addl->proto_data.sasdev_phys == NULL)
 		return;
-	for (i = 0;i < addl->proto_hdr.sas->base_hdr.num_phys;i++) {
+	for (i = 0; i < addl->proto_hdr.sas->base_hdr.num_phys; i++) {
 		phy = &addl->proto_data.sasdev_phys[i];
 		sbuf_printf(sbp, "%s:  phy %d:", sesname, i);
 		if (ses_elm_sas_dev_phy_sata_dev(phy))
@@ -2349,7 +2349,7 @@ ses_print_addl_data_sas_type1(char *sesname, struct sbuf *sbp,
 		sbuf_printf(sbp, "Expander: %d phys", num_phys);
 		if (addl->proto_data.sasexp_phys == NULL)
 			return;
-		for (i = 0;i < num_phys;i++) {
+		for (i = 0; i < num_phys; i++) {
 			exp_phy = &addl->proto_data.sasexp_phys[i];
 			sbuf_printf(sbp, "%s:  phy %d: connector %d other %d\n",
 			    sesname, i, exp_phy->connector_index,
@@ -2360,7 +2360,7 @@ ses_print_addl_data_sas_type1(char *sesname, struct sbuf *sbp,
 		sbuf_printf(sbp, "Port: %d phys", num_phys);
 		if (addl->proto_data.sasport_phys == NULL)
 			return;
-		for (i = 0;i < num_phys;i++) {
+		for (i = 0; i < num_phys; i++) {
 			port_phy = &addl->proto_data.sasport_phys[i];
 			sbuf_printf(sbp,
 			    "%s:  phy %d: id %d connector %d other %d\n",
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris.c b/sys/cddl/compat/opensolaris/kern/opensolaris.c
index 10924977c20d..898b2ea49f96 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris.c
@@ -67,7 +67,7 @@ opensolaris_load(void *dummy)
 SYSINIT(opensolaris_register, SI_SUB_OPENSOLARIS, SI_ORDER_FIRST, opensolaris_load, NULL);
 
 static void
-opensolaris_unload(void)
+opensolaris_unload(void *dummy __unused)
 {
 	mutex_destroy(&cpu_lock);
 }
diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c
index 0ea7d072e911..b9dada4eee7b 100644
--- a/sys/compat/ia32/ia32_sysvec.c
+++ b/sys/compat/ia32/ia32_sysvec.c
@@ -145,7 +145,7 @@ struct sysentvec ia32_freebsd_sysvec = {
 };
 INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
 
-static Elf32_Brandinfo ia32_brand_info = {
+static const Elf32_Brandinfo ia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
@@ -155,12 +155,10 @@ static Elf32_Brandinfo ia32_brand_info = {
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
+C_SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
+    (sysinit_cfunc_t)elf32_insert_brand_entry, &ia32_brand_info);
 
-SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
-	(sysinit_cfunc_t) elf32_insert_brand_entry,
-	&ia32_brand_info);
-
-static Elf32_Brandinfo ia32_brand_oinfo = {
+static const Elf32_Brandinfo ia32_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
@@ -170,12 +168,10 @@ static Elf32_Brandinfo ia32_brand_oinfo = {
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
+C_SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
+    (sysinit_cfunc_t)elf32_insert_brand_entry, &ia32_brand_oinfo);
 
-SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
-	(sysinit_cfunc_t) elf32_insert_brand_entry,
-	&ia32_brand_oinfo);
-
-static Elf32_Brandinfo kia32_brand_info = {
+static const Elf32_Brandinfo kia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
@@ -184,10 +180,8 @@ static Elf32_Brandinfo kia32_brand_info = {
 	.brand_note	= &elf32_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
-
-SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
-	(sysinit_cfunc_t) elf32_insert_brand_entry,
-	&kia32_brand_info);
+C_SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
+    (sysinit_cfunc_t)elf32_insert_brand_entry, &kia32_brand_info);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 95b212be1306..7ac48786c77b 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -2216,6 +2216,67 @@ linprocfs_dosysvipc_shm(PFS_FILL_ARGS)
 	return (0);
 }
 
+static int
+linprocfs_doinotify(const char *sysctl, PFS_FILL_ARGS)
+{
+	size_t size;
+	int error, val;
+
+	if (uio->uio_rw == UIO_READ) {
+		size = sizeof(val);
+		error = kernel_sysctlbyname(curthread,
+		    __DECONST(void *, sysctl), &val, &size, NULL, 0, 0, 0);
+		if (error == 0)
+			sbuf_printf(sb, "%d\n", val);
+	} else {
+		char *endp, *newval;
+		long vall;
+
+		sbuf_trim(sb);
+		sbuf_finish(sb);
+		newval = sbuf_data(sb);
+		vall = strtol(newval, &endp, 10);
+		if (vall < 0 || vall > INT_MAX || endp == newval ||
+		    *endp != '\0')
+			return (EINVAL);
+		val = (int)vall;
+		error = kernel_sysctlbyname(curthread,
+		    __DECONST(void *, sysctl), NULL, NULL,
+		    &val, sizeof(val), 0, 0);
+	}
+	return (error);
+}
+
+/*
+ * Filler function for proc/sys/fs/inotify/max_queued_events
+ */
+static int
+linprocfs_doinotify_max_queued_events(PFS_FILL_ARGS)
+{
+	return (linprocfs_doinotify("vfs.inotify.max_queued_events",
+	    PFS_FILL_ARGNAMES));
+}
+
+/*
+ * Filler function for proc/sys/fs/inotify/max_user_instances
+ */
+static int
+linprocfs_doinotify_max_user_instances(PFS_FILL_ARGS)
+{
+	return (linprocfs_doinotify("vfs.inotify.max_user_instances",
+	    PFS_FILL_ARGNAMES));
+}
+
+/*
+ * Filler function for proc/sys/fs/inotify/max_user_watches
+ */
+static int
+linprocfs_doinotify_max_user_watches(PFS_FILL_ARGS)
+{
+	return (linprocfs_doinotify("vfs.inotify.max_user_watches",
+	    PFS_FILL_ARGNAMES));
+}
+
 /*
  * Filler function for proc/sys/fs/mqueue/msg_default
  */
@@ -2313,9 +2374,7 @@ linprocfs_domqueue_queues_max(PFS_FILL_ARGS)
 static int
 linprocfs_init(PFS_INIT_ARGS)
 {
-	struct pfs_node *root;
-	struct pfs_node *dir;
-	struct pfs_node *sys;
+	struct pfs_node *dir, *fs, *root, *sys;
 
 	root = pi->pi_root;
 
@@ -2466,10 +2525,18 @@ linprocfs_init(PFS_INIT_ARGS)
 	    NULL, PFS_RD);
 
 	/* /proc/sys/fs/... */
-	pfs_create_dir(sys, &dir, "fs", NULL, NULL, NULL, 0);
+	pfs_create_dir(sys, &fs, "fs", NULL, NULL, NULL, 0);
+
+	pfs_create_dir(fs, &dir, "inotify", NULL, NULL, NULL, 0);
+	pfs_create_file(dir, NULL, "max_queued_events",
+	    &linprocfs_doinotify_max_queued_events, NULL, NULL, NULL, PFS_RDWR);
+	pfs_create_file(dir, NULL, "max_user_instances",
+	    &linprocfs_doinotify_max_user_instances, NULL, NULL, NULL, PFS_RDWR);
+	pfs_create_file(dir, NULL, "max_user_watches",
+	    &linprocfs_doinotify_max_user_watches, NULL, NULL, NULL, PFS_RDWR);
 
 	/* /proc/sys/fs/mqueue/... */
-	pfs_create_dir(dir, &dir, "mqueue", NULL, NULL, NULL, 0);
+	pfs_create_dir(fs, &dir, "mqueue", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, NULL, "msg_default",
 	    &linprocfs_domqueue_msg_default, NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, NULL, "msgsize_default",
diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c
index 61b207070963..a40f110634f7 100644
--- a/sys/compat/linux/linux.c
+++ b/sys/compat/linux/linux.c
@@ -578,8 +578,13 @@ bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
 	return (0);
 }
 
+/*
+ * If sap is NULL, then osa points at already copied in linux sockaddr that
+ * should be edited in place.  Otherwise memory is allocated, sockaddr
+ * copied in and returned in *sap.
+ */
 int
-linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
+linux_to_bsd_sockaddr(struct l_sockaddr *osa, struct sockaddr **sap,
     socklen_t *len)
 {
 	struct sockaddr *sa;
@@ -609,10 +614,12 @@ linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
 	}
 #endif
 
-	kosa = malloc(salen, M_SONAME, M_WAITOK);
-
-	if ((error = copyin(osa, kosa, *len)))
-		goto out;
+	if (sap != NULL) {
+		kosa = malloc(salen, M_SONAME, M_WAITOK);
+		if ((error = copyin(osa, kosa, *len)))
+			goto out;
+	} else
+		kosa = osa;
 
 	bdom = linux_to_bsd_domain(kosa->sa_family);
 	if (bdom == AF_UNKNOWN) {
@@ -686,12 +693,15 @@ linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
 	sa->sa_family = bdom;
 	sa->sa_len = salen;
 
-	*sap = sa;
-	*len = salen;
+	if (sap != NULL) {
+		*sap = sa;
+		*len = salen;
+	}
 	return (0);
 
 out:
-	free(kosa, M_SONAME);
+	if (sap != NULL)
+		free(kosa, M_SONAME);
 	return (error);
 }
 
diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h
index 97f5a259f300..814c183b338a 100644
--- a/sys/compat/linux/linux_common.h
+++ b/sys/compat/linux/linux_common.h
@@ -43,7 +43,7 @@ sa_family_t	bsd_to_linux_domain(sa_family_t domain);
 #define	AF_UNKNOWN	UINT8_MAX
 int		bsd_to_linux_sockaddr(const struct sockaddr *sa,
 		    struct l_sockaddr **lsa, socklen_t len);
-int		linux_to_bsd_sockaddr(const struct l_sockaddr *lsa,
+int		linux_to_bsd_sockaddr(struct l_sockaddr *lsa,
 		    struct sockaddr **sap, socklen_t *len);
 void		linux_to_bsd_poll_events(struct thread *td, int fd,
 		    short lev, short *bev);
diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c
index 37d0142bae8b..0586eb55a8f3 100644
--- a/sys/compat/linux/linux_futex.c
+++ b/sys/compat/linux/linux_futex.c
@@ -251,7 +251,7 @@ linux_futex(struct thread *td, struct linux_futex_args *args)
 		 * set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
 		 */
 		p = td->td_proc;
-		Elf_Brandinfo *bi = p->p_elf_brandinfo;
+		const Elf_Brandinfo *bi = p->p_elf_brandinfo;
 		if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
 			return (EINVAL);
 		args->val3_compare = false;
diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index 0e07b0a60ced..b1a483ce611c 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -2146,7 +2146,8 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
 		return (ENOPROTOOPT);
 	}
 
-	if (name == IPV6_NEXTHOP) {
+	switch (name) {
+	case IPV6_NEXTHOP: {
 		len = args->optlen;
 		error = linux_to_bsd_sockaddr(PTRIN(args->optval), &sa, &len);
 		if (error != 0)
@@ -2155,7 +2156,34 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
 		error = kern_setsockopt(td, args->s, level,
 		    name, sa, UIO_SYSSPACE, len);
 		free(sa, M_SONAME);
-	} else {
+		break;
+	}
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP: {
+		struct group_source_req req;
+		size_t size;
+
+		size = (name == MCAST_JOIN_SOURCE_GROUP ||
+		    name == MCAST_LEAVE_SOURCE_GROUP) ?
+		    sizeof(struct group_source_req) : sizeof(struct group_req);
+
+		if ((error = copyin(PTRIN(args->optval), &req, size)))
+			return (error);
+		len = sizeof(struct sockaddr_storage);
+		if ((error = linux_to_bsd_sockaddr(
+		    (struct l_sockaddr *)&req.gsr_group, NULL, &len)))
+			return (error);
+		if (size == sizeof(struct group_source_req) &&
+		    (error = linux_to_bsd_sockaddr(
+		    (struct l_sockaddr *)&req.gsr_source, NULL, &len)))
+			return (error);
+		error = kern_setsockopt(td, args->s, level, name, &req,
+		    UIO_SYSSPACE, size);
+		break;
+	}
+	default:
 		error = kern_setsockopt(td, args->s, level,
 		    name, PTRIN(args->optval), UIO_USERSPACE, args->optlen);
 	}
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index ea9b2667607e..9944375c3615 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -679,6 +679,7 @@ options 	TCP_OFFLOAD		# TCP offload support.
 options  	TCP_RFC7413		# TCP Fast Open
 
 options  	TCPHPTS
+#options 	TCP_HPTS_KTEST		# Add KTEST support for HPTS
 
 # In order to enable IPSEC you MUST also add device crypto to
 # your kernel configuration
@@ -2800,7 +2801,7 @@ options 	MAXFILES=999
 
 # Random number generator
 # Alternative algorithm.
-#options 	RANDOM_FENESTRASX
+options 	RANDOM_FENESTRASX
 # Allow the CSPRNG algorithm to be loaded as a module.
 #options 	RANDOM_LOADABLE
 # Select this to allow high-rate but potentially expensive
diff --git a/sys/conf/dtb.build.mk b/sys/conf/dtb.build.mk
index 327d69106244..7eb0db5e8b80 100644
--- a/sys/conf/dtb.build.mk
+++ b/sys/conf/dtb.build.mk
@@ -1,7 +1,3 @@
-
-.include <bsd.init.mk>
-# Grab all the options for a kernel build. For backwards compat, we need to
-# do this after bsd.own.mk.
 .include "kern.opts.mk"
 
 DTC?=		dtc
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 856ea3af1372..2f412fa3cb1b 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -73,6 +73,7 @@ arm64/arm64/pmap.c				standard
 arm64/arm64/ptrace_machdep.c			standard
 arm64/arm64/sdt_machdep.c			optional kdtrace_hooks
 arm64/arm64/sigtramp.S				standard
+arm64/arm64/spec_workaround.c			standard
 arm64/arm64/stack_machdep.c			optional ddb | stack
 arm64/arm64/strcmp.S				standard
 arm64/arm64/strncmp.S				standard
diff --git a/sys/conf/kern.opts.mk b/sys/conf/kern.opts.mk
index 045e55d1b19a..cef4dd11ba58 100644
--- a/sys/conf/kern.opts.mk
+++ b/sys/conf/kern.opts.mk
@@ -4,6 +4,7 @@
 # parts to omit (eg CDDL or SOURCELESS_HOST). Some of these will cause
 # config.mk to define symbols in various opt_*.h files.
 
+
 #
 # Define MK_* variables (which are either "yes" or "no") for users
 # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the
@@ -13,17 +14,12 @@
 # that haven't been converted over.
 #
 
-# Note: bsd.own.mk must be included before the rest of kern.opts.mk to make
-# building on 10.x and earlier work. This should be removed when that's no
-# longer supported since it confounds the defaults (since it uses the host's
-# notion of defaults rather than what's default in current when building
-# within sys/modules).
-.include <bsd.own.mk>
-
 # These options are used by the kernel build process (kern.mk and kmod.mk)
 # They have to be listed here so we can build modules outside of the
 # src tree.
 
+.include <bsd.init.mk>
+
 KLDXREF_CMD?=	kldxref
 
 __DEFAULT_YES_OPTIONS = \
diff --git a/sys/conf/options b/sys/conf/options
index b48ad1cf42cf..0b795a8d28fb 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -231,6 +231,7 @@ SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TCPHPTS
+TCP_HPTS_KTEST	opt_inet.h
 TCP_REQUEST_TRK opt_global.h
 TCP_ACCOUNTING	opt_global.h
 TCP_BBR		opt_inet.h
diff --git a/sys/conf/std.debug b/sys/conf/std.debug
index f5ed5582c78d..0149779b3e5c 100644
--- a/sys/conf/std.debug
+++ b/sys/conf/std.debug
@@ -16,3 +16,4 @@ options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 options 	VERBOSE_SYSINIT=0	# Support debug.verbose_sysinit, off by default
 options		ALT_BREAK_TO_DEBUGGER	# Enter debugger on keyboard escape sequence
 options		KDTRACE_MIB_SDT		# Add SDT probes to network counters
+options 	TCP_HPTS_KTEST		# Add KTEST support for HPTS
diff --git a/sys/conf/std.nodebug b/sys/conf/std.nodebug
index 4035e28d2a62..79676a1d618f 100644
--- a/sys/conf/std.nodebug
+++ b/sys/conf/std.nodebug
@@ -16,6 +16,7 @@ nooptions	KCOV
 nooptions	MALLOC_DEBUG_MAXZONES
 nooptions	QUEUE_MACRO_DEBUG_TRASH
 nooptions	KDTRACE_MIB_SDT
+nooptions	TCP_HPTS_KTEST
 
 # Net80211 debugging
 nooptions	IEEE80211_DEBUG
diff --git a/sys/contrib/libnv/bsd_nvpair.c b/sys/contrib/libnv/bsd_nvpair.c
index c73bc2189121..b884dd260b84 100644
--- a/sys/contrib/libnv/bsd_nvpair.c
+++ b/sys/contrib/libnv/bsd_nvpair.c
@@ -985,13 +985,13 @@ nvpair_unpack_string_array(bool isbe __unused, nvpair_t *nvp,
 	size = nvp->nvp_datasize;
 	tmp = (const char *)ptr;
 	for (ii = 0; ii < nvp->nvp_nitems; ii++) {
-		len = strnlen(tmp, size - 1) + 1;
-		size -= len;
-		if (tmp[len - 1] != '\0') {
+		if (size <= 0) {
 			ERRNO_SET(EINVAL);
 			return (NULL);
 		}
-		if (size < 0) {
+		len = strnlen(tmp, size - 1) + 1;
+		size -= len;
+		if (tmp[len - 1] != '\0') {
 			ERRNO_SET(EINVAL);
 			return (NULL);
 		}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 393bfaa65ff5..ebc2c0eeb6d2 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -163,6 +163,13 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+static void
+warn_deprecated_sysctl(const char *old, const char *new)
+{
+	printf("WARNING: sysctl vfs.zfs.%s is deprecated. Use vfs.zfs.%s instead.\n",
+	    old, new);
+}
+
 int
 param_set_arc_max(SYSCTL_HANDLER_ARGS)
 {
@@ -185,9 +192,17 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
 	if (val != 0)
 		zfs_arc_max = arc_c_max;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("arc_max", "arc.max");
+
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
+	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 1, param_set_arc_max, "LU",
+	"Maximum ARC size in bytes (LEGACY)");
+
 int
 param_set_arc_min(SYSCTL_HANDLER_ARGS)
 {
@@ -209,9 +224,17 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
 	if (val != 0)
 		zfs_arc_min = arc_c_min;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("arc_min", "arc.min");
+
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
+	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 1, param_set_arc_min, "LU",
+	"Minimum ARC size in bytes (LEGACY)");
+
 extern uint_t zfs_arc_free_target;
 
 int
@@ -232,9 +255,22 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
 
 	zfs_arc_free_target = val;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("arc_free_target", "arc.free_target");
+
 	return (0);
 }
 
+/*
+ * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on
+ * pagedaemon initialization.
+ */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
+	CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	NULL, 1, param_set_arc_free_target, "IU",
+	"Desired number of free pages below which ARC triggers reclaim"
+	" (LEGACY)");
+
 int
 param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 {
@@ -250,9 +286,193 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 
 	arc_no_grow_shift = val;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");
+
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
+	CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 1, param_set_arc_no_grow_shift, "I",
+	"log2(fraction of ARC which must be free to allow growing) (LEGACY)");
+
+extern uint64_t l2arc_write_max;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max,
+	CTLFLAG_RWTUN, &l2arc_write_max, 0,
+	"Max write bytes per interval (LEGACY)");
+
+extern uint64_t l2arc_write_boost;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost,
+	CTLFLAG_RWTUN, &l2arc_write_boost, 0,
+	"Extra write bytes during device warmup (LEGACY)");
+
+extern uint64_t l2arc_headroom;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom,
+	CTLFLAG_RWTUN, &l2arc_headroom, 0,
+	"Number of max device writes to precache (LEGACY)");
+
+extern uint64_t l2arc_headroom_boost;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost,
+	CTLFLAG_RWTUN, &l2arc_headroom_boost, 0,
+	"Compressed l2arc_headroom multiplier (LEGACY)");
+
+extern uint64_t l2arc_feed_secs;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs,
+	CTLFLAG_RWTUN, &l2arc_feed_secs, 0,
+	"Seconds between L2ARC writing (LEGACY)");
+
+extern uint64_t l2arc_feed_min_ms;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms,
+	CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0,
+	"Min feed interval in milliseconds (LEGACY)");
+
+extern int l2arc_noprefetch;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch,
+	CTLFLAG_RWTUN, &l2arc_noprefetch, 0,
+	"Skip caching prefetched buffers (LEGACY)");
+
+extern int l2arc_feed_again;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again,
+	CTLFLAG_RWTUN, &l2arc_feed_again, 0,
+	"Turbo L2ARC warmup (LEGACY)");
+
+extern int l2arc_norw;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
+	CTLFLAG_RWTUN, &l2arc_norw, 0,
+	"No reads during writes (LEGACY)");
+
+static int
+param_get_arc_state_size(SYSCTL_HANDLER_ARGS)
+{
+	arc_state_t *state = (arc_state_t *)arg1;
+	int64_t val;
+
+	val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) +
+	    zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]);
+	return (sysctl_handle_64(oidp, &val, 0, req));
+}
+
+extern arc_state_t ARC_anon;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_anon, 0, param_get_arc_state_size, "Q",
+	"size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in anonymous state");
+
+extern arc_state_t ARC_mru;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mru, 0, param_get_arc_state_size, "Q",
+	"size of mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mru state");
+
+extern arc_state_t ARC_mru_ghost;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mru_ghost, 0, param_get_arc_state_size, "Q",
+	"size of mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mru ghost state");
+
+extern arc_state_t ARC_mfu;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mfu, 0, param_get_arc_state_size, "Q",
+	"size of mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mfu state");
+
+extern arc_state_t ARC_mfu_ghost;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mfu_ghost, 0, param_get_arc_state_size, "Q",
+	"size of mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mfu ghost state");
+
+extern arc_state_t ARC_uncached;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_uncached, 0, param_get_arc_state_size, "Q",
+	"size of uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
+	&ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
+	&ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in uncached state");
+
+extern arc_state_t ARC_l2c_only;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_l2c_only, 0, param_get_arc_state_size, "Q",
+	"size of l2c_only state");
+
+/* dbuf.c */
+
+/* dmu.c */
+
+/* dmu_zfetch.c */
+
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
+
+extern uint32_t	zfetch_max_distance;
+
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance,
+	CTLFLAG_RWTUN, &zfetch_max_distance, 0,
+	"Max bytes to prefetch per stream (LEGACY)");
+
+extern uint32_t	zfetch_max_idistance;
+
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
+	CTLFLAG_RWTUN, &zfetch_max_idistance, 0,
+	"Max bytes to prefetch indirects for per stream (LEGACY)");
+
+/* dsl_pool.c */
+
+/* dnode.c */
+
+/* dsl_scan.c */
+
 /* metaslab.c */
 
 int
@@ -313,6 +533,19 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
 	"Condense on-disk spacemap when it is more than this many percents"
 	" of in-memory counterpart");
 
+extern uint_t zfs_remove_max_segment;
+
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment,
+	CTLFLAG_RWTUN, &zfs_remove_max_segment, 0,
+	"Largest contiguous segment ZFS will attempt to allocate when removing"
+	" a device");
+
+extern int zfs_removal_suspend_progress;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress,
+	CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0,
+	"Ensures certain actions can happen while in the middle of a removal");
+
 /*
  * Minimum size which forces the dynamic allocator to change
  * it's allocation strategy.  Once the space map cannot satisfy
@@ -532,9 +765,18 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
 
 	zfs_vdev_min_auto_ashift = val;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("min_auto_ashift",
+		    "vdev.min_auto_ashift");
+
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
+	CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
+	param_set_min_auto_ashift, "IU",
+	"Min ashift used when creating new top-level vdev. (LEGACY)");
+
 int
 param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 {
@@ -551,9 +793,19 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 
 	zfs_vdev_max_auto_ashift = val;
 
+	if (arg2 != 0)
+		warn_deprecated_sysctl("max_auto_ashift",
+		    "vdev.max_auto_ashift");
+
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
+	CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
+	param_set_max_auto_ashift, "IU",
+	"Max ashift used when optimizing for logical -> physical sector size on"
+	" new top-level vdevs. (LEGACY)");
+
 /*
  * Since the DTL space map of a vdev is not expected to have a lot of
  * entries, we default its block size to 4K.
@@ -575,6 +827,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz,
 	CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0,
 	"Block size for standard space map.  Power of 2 greater than 4096.");
 
+extern int vdev_validate_skip;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
+	CTLFLAG_RDTUN, &vdev_validate_skip, 0,
+	"Enable to bypass vdev_validate().");
+
+/* vdev_mirror.c */
+
+/* vdev_queue.c */
+
+extern uint_t zfs_vdev_max_active;
+
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight,
+	CTLFLAG_RWTUN, &zfs_vdev_max_active, 0,
+	"The maximum number of I/Os of all types active for each device."
+	" (LEGACY)");
+
 /* zio.c */
 
 SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata,
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 591e2dade59e..b677f90280d7 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq;
 static uint_t zfs_arc_evict_threads = 0;
 
 /* The 7 states: */
-static arc_state_t ARC_anon;
-/*  */ arc_state_t ARC_mru;
-static arc_state_t ARC_mru_ghost;
-/*  */ arc_state_t ARC_mfu;
-static arc_state_t ARC_mfu_ghost;
-static arc_state_t ARC_l2c_only;
-static arc_state_t ARC_uncached;
+arc_state_t ARC_anon;
+arc_state_t ARC_mru;
+arc_state_t ARC_mru_ghost;
+arc_state_t ARC_mfu;
+arc_state_t ARC_mfu_ghost;
+arc_state_t ARC_l2c_only;
+arc_state_t ARC_uncached;
 
 arc_stats_t arc_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
@@ -832,15 +832,15 @@ typedef struct arc_async_flush {
 #define	L2ARC_FEED_TYPES	4
 
 /* L2ARC Performance Tunables */
-static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
-static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
-static uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* # of dev writes */
-static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
-static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;	/* min interval msecs */
-static int l2arc_noprefetch = B_TRUE;		/* don't cache prefetch bufs */
-static int l2arc_feed_again = B_TRUE;		/* turbo warmup */
-static int l2arc_norw = B_FALSE;		/* no reads during writes */
+uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
+uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
+uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* # of dev writes */
+uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
+uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;	/* min interval msecs */
+int l2arc_noprefetch = B_TRUE;			/* don't cache prefetch bufs */
+int l2arc_feed_again = B_TRUE;			/* turbo warmup */
+int l2arc_norw = B_FALSE;			/* no reads during writes */
 static uint_t l2arc_meta_percent = 33;	/* limit on headers size */
 
 /*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 3d3a9c713568..51165d0bf723 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -57,19 +57,19 @@ static unsigned int	zfetch_max_sec_reap = 2;
 /* min bytes to prefetch per stream (default 2MB) */
 static unsigned int	zfetch_min_distance = 2 * 1024 * 1024;
 /* max bytes to prefetch per stream (default 8MB) */
-static unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
+unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
 #else
 /* min bytes to prefetch per stream (default 4MB) */
 static unsigned int	zfetch_min_distance = 4 * 1024 * 1024;
 /* max bytes to prefetch per stream (default 64MB) */
-static unsigned int	zfetch_max_distance = 64 * 1024 * 1024;
+unsigned int	zfetch_max_distance = 64 * 1024 * 1024;
 #endif
 /* max bytes to prefetch indirects for per stream (default 128MB) */
-static unsigned int	zfetch_max_idistance = 128 * 1024 * 1024;
+unsigned int	zfetch_max_idistance = 128 * 1024 * 1024;
 /* max request reorder distance within a stream (default 16MB) */
-static unsigned int	zfetch_max_reorder = 16 * 1024 * 1024;
+unsigned int	zfetch_max_reorder = 16 * 1024 * 1024;
 /* Max log2 fraction of holes in a stream */
-static unsigned int	zfetch_hole_shift = 2;
+unsigned int	zfetch_hole_shift = 2;
 
 typedef struct zfetch_stats {
 	kstat_named_t zfetchstat_hits;
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 654e034de9e1..c8d7280387a2 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29;
 /* upper limit for metaslab size (16G) */
 static uint_t zfs_vdev_max_ms_shift = 34;
 
-static int vdev_validate_skip = B_FALSE;
+int vdev_validate_skip = B_FALSE;
 
 /*
  * Since the DTL space map of a vdev is not expected to have a lot of
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index e69e5598939e..c12713b107bf 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -122,7 +122,7 @@
  * The maximum number of i/os active to each device.  Ideally, this will be >=
  * the sum of each queue's max_active.
  */
-static uint_t zfs_vdev_max_active = 1000;
+uint_t zfs_vdev_max_active = 1000;
 
 /*
  * Per-queue limits on the number of i/os active to each device.  If the
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 2ce0121324ad..2f7a739da241 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024;
  *
  * See also the accessor function spa_remove_max_segment().
  */
-static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
+uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
 
 /*
  * Ignore hard IO errors during device removal.  When set if a device
@@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024;
  * This is used by the test suite so that it can ensure that certain
  * actions happen while in the middle of a removal.
  */
-static int zfs_removal_suspend_progress = 0;
+int zfs_removal_suspend_progress = 0;
 
 #define	VDEV_REMOVAL_ZAP_OBJS	"lzap"
 
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index 54b50c9dba77..127ea188f17f 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -76,8 +76,8 @@ READ_SIT_OUT_SECS		vdev.read_sit_out_secs		vdev_read_sit_out_secs
 SIT_OUT_CHECK_INTERVAL		vdev.raidz_outlier_check_interval_ms	vdev_raidz_outlier_check_interval_ms
 SIT_OUT_INSENSITIVITY		vdev.raidz_outlier_insensitivity	vdev_raidz_outlier_insensitivity
 REBUILD_SCRUB_ENABLED		rebuild_scrub_enabled		zfs_rebuild_scrub_enabled
-REMOVAL_SUSPEND_PROGRESS	vdev.removal_suspend_progress	zfs_removal_suspend_progress
-REMOVE_MAX_SEGMENT		vdev.remove_max_segment		zfs_remove_max_segment
+REMOVAL_SUSPEND_PROGRESS	removal_suspend_progress	zfs_removal_suspend_progress
+REMOVE_MAX_SEGMENT		remove_max_segment		zfs_remove_max_segment
 RESILVER_MIN_TIME_MS		resilver_min_time_ms		zfs_resilver_min_time_ms
 RESILVER_DEFER_PERCENT		resilver_defer_percent		zfs_resilver_defer_percent
 SCAN_LEGACY			scan_legacy			zfs_scan_legacy
diff --git a/sys/crypto/chacha20/chacha.c b/sys/crypto/chacha20/chacha.c
index 52f7e18c651c..cb06003b0ecf 100644
--- a/sys/crypto/chacha20/chacha.c
+++ b/sys/crypto/chacha20/chacha.c
@@ -138,7 +138,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
   for (;;) {
     if (bytes < 64) {
 #ifndef KEYSTREAM_ONLY
-      for (i = 0;i < bytes;++i) tmp[i] = m[i];
+      for (i = 0; i < bytes; ++i) tmp[i] = m[i];
       m = tmp;
 #endif
       ctarget = c;
@@ -160,7 +160,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
     x13 = j13;
     x14 = j14;
     x15 = j15;
-    for (i = 20;i > 0;i -= 2) {
+    for (i = 20; i > 0; i -= 2) {
       QUARTERROUND( x0, x4, x8,x12)
       QUARTERROUND( x1, x5, x9,x13)
       QUARTERROUND( x2, x6,x10,x14)
@@ -240,7 +240,7 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
 
     if (bytes <= 64) {
       if (bytes < 64) {
-        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+        for (i = 0; i < bytes; ++i) ctarget[i] = c[i];
       }
       x->input[12] = j12;
       x->input[13] = j13;
diff --git a/sys/crypto/openssl/ossl_sha256.c b/sys/crypto/openssl/ossl_sha256.c
index 4613a9409b44..50cb9739d114 100644
--- a/sys/crypto/openssl/ossl_sha256.c
+++ b/sys/crypto/openssl/ossl_sha256.c
@@ -74,11 +74,11 @@ ossl_sha256_init(void *c_)
         unsigned int  nn;               \
         switch ((c)->md_len)            \
         {   case SHA224_DIGEST_LENGTH:  \
-                for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++)       \
+                for (nn=0; nn < SHA224_DIGEST_LENGTH / 4; nn++)	\
                 {   ll=(c)->h[nn]; (void)HOST_l2c(ll,(s));   }  \
                 break;                  \
             case SHA256_DIGEST_LENGTH:  \
-                for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++)       \
+                for (nn=0; nn < SHA256_DIGEST_LENGTH / 4; nn++)	\
                 {   ll=(c)->h[nn]; (void)HOST_l2c(ll,(s));   }  \
                 break;                  \
             default:                    \
diff --git a/sys/dev/aic7xxx/aic79xx.c b/sys/dev/aic7xxx/aic79xx.c
index 2b5015b20e41..cee45fa5cc8a 100644
--- a/sys/dev/aic7xxx/aic79xx.c
+++ b/sys/dev/aic7xxx/aic79xx.c
@@ -7788,8 +7788,8 @@ ahd_abort_scbs(struct ahd_softc *ahd, int target, char channel,
 	}
 
 	if (role != ROLE_TARGET) {
-		for (;i < maxtarget; i++) {
-			for (j = minlun;j < maxlun; j++) {
+		for (; i < maxtarget; i++) {
+			for (j = minlun; j < maxlun; j++) {
 				u_int scbid;
 				u_int tcl;
 
diff --git a/sys/dev/aic7xxx/aic7xxx.c b/sys/dev/aic7xxx/aic7xxx.c
index c09876e9f589..18f68b806948 100644
--- a/sys/dev/aic7xxx/aic7xxx.c
+++ b/sys/dev/aic7xxx/aic7xxx.c
@@ -5903,8 +5903,8 @@ ahc_abort_scbs(struct ahc_softc *ahc, int target, char channel,
 	}
 
 	if (role != ROLE_TARGET) {
-		for (;i < maxtarget; i++) {
-			for (j = minlun;j < maxlun; j++) {
+		for (; i < maxtarget; i++) {
+			for (j = minlun; j < maxlun; j++) {
 				u_int scbid;
 				u_int tcl;
 
diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c
index bed74c3b6181..5c1d7ff30976 100644
--- a/sys/dev/dc/if_dc.c
+++ b/sys/dev/dc/if_dc.c
@@ -999,7 +999,7 @@ dc_setfilt_21143(struct dc_softc *sc)
 	else
 		DC_CLRBIT(sc, DC_NETCFG, DC_NETCFG_RX_ALLMULTI);
 
-	if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sp);
+	if_foreach_llmaddr(ifp, dc_hash_maddr_21143, sc);
 
 	if (if_getflags(ifp) & IFF_BROADCAST) {
 		h = dc_mchash_le(sc, if_getbroadcastaddr(ifp));
diff --git a/sys/dev/enetc/if_enetc.c b/sys/dev/enetc/if_enetc.c
index 808397b229a7..53002f9d73ce 100644
--- a/sys/dev/enetc/if_enetc.c
+++ b/sys/dev/enetc/if_enetc.c
@@ -848,7 +848,7 @@ enetc_hash_vid(uint16_t vid)
 	bool bit;
 	int i;
 
-	for (i = 0;i < 6;i++) {
+	for (i = 0; i < 6; i++) {
 		bit = vid & BIT(i);
 		bit ^= !!(vid & BIT(i + 6));
 		hash |= bit << i;
@@ -1020,7 +1020,7 @@ enetc_msix_intr_assign(if_ctx_t ctx, int msix)
 		    ENETC_RBICR0_ICEN | ENETC_RBICR0_SET_ICPT(ENETC_RX_INTR_PKT_THR));
 	}
 	vector = 0;
-	for (i = 0;i < sc->tx_num_queues; i++, vector++) {
+	for (i = 0; i < sc->tx_num_queues; i++, vector++) {
 		tx_queue = &sc->tx_queues[i];
 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
 		iflib_softirq_alloc_generic(ctx, &tx_queue->irq,
@@ -1130,7 +1130,7 @@ enetc_isc_txd_encap(void *data, if_pkt_info_t ipi)
 	}
 
 	/* Now add remaining descriptors. */
-	for (;i < ipi->ipi_nsegs; i++) {
+	for (; i < ipi->ipi_nsegs; i++) {
 		desc = &queue->ring[pidx];
 		bzero(desc, sizeof(*desc));
 		desc->addr = segs[i].ds_addr;
diff --git a/sys/dev/fdt/fdt_slicer.c b/sys/dev/fdt/fdt_slicer.c
index 3ba4eddf8b61..50112db5cfae 100644
--- a/sys/dev/fdt/fdt_slicer.c
+++ b/sys/dev/fdt/fdt_slicer.c
@@ -45,7 +45,7 @@
 
 static int fill_slices(device_t dev, const char *provider,
     struct flash_slice *slices, int *slices_num);
-static void fdt_slicer_init(void);
+static void fdt_slicer_init(void *);
 
 static int
 fill_slices_from_node(phandle_t node, struct flash_slice *slices, int *count)
@@ -138,7 +138,7 @@ fill_slices(device_t dev, const char *provider __unused,
 }
 
 static void
-fdt_slicer_init(void)
+fdt_slicer_init(void *dummy __unused)
 {
 
 	flash_register_slicer(fill_slices, FLASH_SLICES_TYPE_NAND, false);
@@ -147,7 +147,7 @@ fdt_slicer_init(void)
 }
 
 static void
-fdt_slicer_cleanup(void)
+fdt_slicer_cleanup(void *dummy __unused)
 {
 
 	flash_register_slicer(NULL, FLASH_SLICES_TYPE_NAND, true);
diff --git a/sys/dev/hptmv/entry.c b/sys/dev/hptmv/entry.c
index 5c4718bf582f..f3d58f285b39 100644
--- a/sys/dev/hptmv/entry.c
+++ b/sys/dev/hptmv/entry.c
@@ -430,7 +430,7 @@ static void device_change(IAL_ADAPTER_T *pAdapter , MV_U8 channelIndex, int plug
 		if(pVDev->pParent) 
 		{
 			int iMember;
-			for(iMember = 0; iMember < 	pVDev->pParent->u.array.bArnMember; iMember++)
+			for (iMember = 0; iMember < pVDev->pParent->u.array.bArnMember; iMember++)
 				if((PVDevice)pVDev->pParent->u.array.pMember[iMember] == pVDev)
 					pVDev->pParent->u.array.pMember[iMember] = NULL;
 			pVDev->pParent = NULL;
@@ -984,7 +984,7 @@ fRegisterVdevice(IAL_ADAPTER_T *pAdapter)
 	PVBus  pVBus;
 	int i,j;
 
-	for(i=0;i<MV_SATA_CHANNELS_NUM;i++) {
+	for (i = 0; i < MV_SATA_CHANNELS_NUM; i++) {
 		pPhysical = &(pAdapter->VDevices[i]);
 		pLogical = pPhysical;
 		while (pLogical->pParent) pLogical = pLogical->pParent;
@@ -1027,8 +1027,7 @@ GetSpareDisk(_VBUS_ARG PVDevice pArray)
 	PVDevice pVDevice, pFind = NULL;
 	int i;
 
-	for(i=0;i<MV_SATA_CHANNELS_NUM;i++)
-	{
+	for (i=0; i < MV_SATA_CHANNELS_NUM; i++) {
 		pVDevice = &pAdapter->VDevices[i];
 		if(!pVDevice) 
 			continue;
@@ -1356,7 +1355,7 @@ unregister:
 		goto unregister;
 	}
 
-	for (i=0; i<MAX_COMMAND_BLOCKS_FOR_EACH_VBUS; i++) {
+	for (i = 0; i < MAX_COMMAND_BLOCKS_FOR_EACH_VBUS; i++) {
 		FreeCommand(_VBUS_P &(pAdapter->pCommandBlocks[i]));
 	}
 
@@ -1370,7 +1369,7 @@ unregister:
 
 	memset((void *)pAdapter->pbus_dmamap, 0, sizeof(struct _BUS_DMAMAP) * MAX_QUEUE_COMM);
 	pAdapter->pbus_dmamap_list = 0;
-	for (i=0; i < MAX_QUEUE_COMM; i++) {
+	for (i = 0; i < MAX_QUEUE_COMM; i++) {
 		PBUS_DMAMAP  pmap = &(pAdapter->pbus_dmamap[i]);
 		pmap->pAdapter = pAdapter;
 		dmamap_put(pmap);
@@ -1398,7 +1397,7 @@ unregister:
 	pAdapter->prdTableAlignedAddr = (PUCHAR)(((ULONG_PTR)pAdapter->prdTableAddr + 0x1f) & ~(ULONG_PTR)0x1fL);
 	{
 		PUCHAR PRDTable = pAdapter->prdTableAlignedAddr;
-		for (i=0; i<PRD_TABLES_FOR_VBUS; i++)
+		for (i = 0; i < PRD_TABLES_FOR_VBUS; i++)
 		{
 /*			KdPrint(("i=%d,pAdapter->pFreePRDLink=%p\n",i,pAdapter->pFreePRDLink)); */
 			FreePRDTable(pAdapter, PRDTable);
@@ -1447,7 +1446,7 @@ unregister:
 	}
 
 #ifdef SUPPORT_ARRAY
-	for(i = MAX_ARRAY_DEVICE - 1; i >= 0; i--) {
+	for (i = MAX_ARRAY_DEVICE - 1; i >= 0; i--) {
 		pVDev = ArrayTables(i);
 		mArFreeArrayTable(pVDev);
 	}
@@ -1467,7 +1466,7 @@ unregister:
 	_vbus_p->nInstances = 1;
 	fRegisterVdevice(pAdapter);
 
-	for (channel=0;channel<MV_SATA_CHANNELS_NUM;channel++) {
+	for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) {
 		pVDev = _vbus_p->pVDevice[channel];
 		if (pVDev && pVDev->vf_online)
 			fCheckBootable(pVDev);
@@ -1567,7 +1566,7 @@ fResetActiveCommands(PVBus _vbus_p)
 {
 	MV_SATA_ADAPTER *pMvSataAdapter = &((IAL_ADAPTER_T *)_vbus_p->OsExt)->mvSataAdapter;
 	MV_U8 channel;
-	for (channel=0;channel< MV_SATA_CHANNELS_NUM;channel++) {
+	for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) {
 		if (pMvSataAdapter->sataChannel[channel] && pMvSataAdapter->sataChannel[channel]->outstandingCommands) 
 			MvSataResetChannel(pMvSataAdapter,channel);
 	}
@@ -1590,7 +1589,7 @@ check_cmds:
 		dataxfer_poll();
 		xor_poll();
 #endif
-		for (channel=0;channel< MV_SATA_CHANNELS_NUM;channel++) {
+		for (channel = 0; channel < MV_SATA_CHANNELS_NUM; channel++) {
 			pMvSataChannel = pMvSataAdapter->sataChannel[channel];
 			if (pMvSataChannel && pMvSataChannel->outstandingCommands) 
 			{
@@ -1716,7 +1715,7 @@ fDeviceSendCommand(_VBUS_ARG PCommand pCmd)
 
 	MV_BOOLEAN is48bit;
 	MV_U8      channel;
-	int        i=0;
+	int        i = 0;
 	
 	DECLARE_BUFFER(FPSCAT_GATH, tmpSg);
 
@@ -2141,7 +2140,7 @@ FlushAdapter(IAL_ADAPTER_T *pAdapter)
 	hpt_printk(("flush all devices\n"));
 	
 	/* flush all devices */
-	for (i=0; i<MAX_VDEVICE_PER_VBUS; i++) {
+	for (i = 0; i < MAX_VDEVICE_PER_VBUS; i++) {
 		PVDevice pVDev = pAdapter->VBus.pVDevice[i];
 		if(pVDev) fFlushVDev(pVDev);
 	}
@@ -2174,7 +2173,7 @@ Check_Idle_Call(IAL_ADAPTER_T *pAdapter)
 		{
 			int i;
 			PVDevice pArray;
-			for(i = 0; i < MAX_ARRAY_PER_VBUS; i++){
+			for (i = 0; i < MAX_ARRAY_PER_VBUS; i++) {
 				if ((pArray=ArrayTables(i))->u.array.dArStamp==0) 
 					continue; 
 				else if (pArray->u.array.rf_auto_rebuild) {
@@ -2378,7 +2377,7 @@ hpt_free_ccb(union ccb **ccb_Q, union ccb *ccb)
 static void hpt_worker_thread(void)
 {
 
-	for(;;)	{
+	for (;;) {
 		mtx_lock(&DpcQueue_Lock);
 		while (DpcQueue_First!=DpcQueue_Last) {
 			ST_HPT_DPC p;
@@ -2418,7 +2417,7 @@ static void hpt_worker_thread(void)
 					mtx_lock(&pAdapter->lock);
 					_vbus_p = &pAdapter->VBus;
 
-					for (i=0;i<MAX_ARRAY_PER_VBUS;i++) 
+					for (i = 0; i < MAX_ARRAY_PER_VBUS; i++)
 					{
 						if ((pArray=ArrayTables(i))->u.array.dArStamp==0) 
 							continue; 
@@ -2472,7 +2471,7 @@ launch_worker_thread(void)
 		int i;
 		PVDevice pVDev;
 
-		for(i = 0; i < MAX_ARRAY_PER_VBUS; i++) 
+		for (i = 0; i < MAX_ARRAY_PER_VBUS; i++)
 			if ((pVDev=ArrayTables(i))->u.array.dArStamp==0) 
 				continue; 
 			else{
diff --git a/sys/dev/hptmv/gui_lib.c b/sys/dev/hptmv/gui_lib.c
index d78fdcca69d2..f11044db733a 100644
--- a/sys/dev/hptmv/gui_lib.c
+++ b/sys/dev/hptmv/gui_lib.c
@@ -86,8 +86,7 @@ check_VDevice_valid(PVDevice p)
 	while(pAdapter != NULL)
 	{
 		_vbus_p = &pAdapter->VBus;
-		for (i=0;i<MAX_ARRAY_PER_VBUS;i++) 
-		{
+		for (i = 0; i<MAX_ARRAY_PER_VBUS; i++) {
 			pVDevice=ArrayTables(i);
 			if ((pVDevice->u.array.dArStamp != 0) && (pVDevice == p))
 				return 0;
@@ -244,9 +243,9 @@ static void get_array_info(PVDevice pVDevice, PHPT_ARRAY_INFO pArrayInfo)
 		if(pVDevice->u.array.pMember[i] != NULL)
 			pArrayInfo->Members[pArrayInfo->nDisk++] = VDEV_TO_ID(pVDevice->u.array.pMember[i]);
 
-	for(i=pArrayInfo->nDisk; i<MAX_ARRAY_MEMBERS; i++)
+	for (i = pArrayInfo->nDisk; i < MAX_ARRAY_MEMBERS; i++)
 		pArrayInfo->Members[i] = INVALID_DEVICEID;
-	}
+}
 
 static void get_array_info_v2(PVDevice pVDevice, PHPT_ARRAY_INFO_V2 pArrayInfo)
 {
@@ -266,7 +265,7 @@ static void get_array_info_v2(PVDevice pVDevice, PHPT_ARRAY_INFO_V2 pArrayInfo)
 		if(pVDevice->u.array.pMember[i] != NULL)
 			pArrayInfo->Members[pArrayInfo->nDisk++] = VDEV_TO_ID(pVDevice->u.array.pMember[i]);
 
-	for(i=pArrayInfo->nDisk; i<MAX_ARRAY_MEMBERS_V2; i++)
+	for (i = pArrayInfo->nDisk; i < MAX_ARRAY_MEMBERS_V2; i++)
 		pArrayInfo->Members[i] = INVALID_DEVICEID;
 }
 #endif
@@ -461,8 +460,7 @@ found:
 	pInfo->IoPort = 0;
 	pInfo->ControlPort = 0;
 	
-	for (i=0; i<2 ;i++)
-	{
+	for (i = 0; i < 2; i++) {
 		pInfo->Devices[i] = (DEVICEID)INVALID_DEVICEID;
 	}
 
diff --git a/sys/dev/hptmv/hptproc.c b/sys/dev/hptmv/hptproc.c
index 38fe61ee7e04..328750d9034c 100644
--- a/sys/dev/hptmv/hptproc.c
+++ b/sys/dev/hptmv/hptproc.c
@@ -107,7 +107,7 @@ hpt_set_asc_info(IAL_ADAPTER_T *pAdapter, char *buffer,int length)
 				return -EINVAL;
 			}
 
-            for (i=0;i<MV_SATA_CHANNELS_NUM;i++)
+            for (i = 0; i < MV_SATA_CHANNELS_NUM; i++)
 				if(i == ichan)
 				    goto rebuild;
 
diff --git a/sys/dev/ice/ice_common.c b/sys/dev/ice/ice_common.c
index ad4ea4c8e7a1..b895f661bc46 100644
--- a/sys/dev/ice/ice_common.c
+++ b/sys/dev/ice/ice_common.c
@@ -213,6 +213,15 @@ int ice_set_mac_type(struct ice_hw *hw)
 	case ICE_DEV_ID_E830_L_QSFP:
 	case ICE_DEV_ID_E830C_SFP:
 	case ICE_DEV_ID_E830_L_SFP:
+	case ICE_DEV_ID_E835CC_BACKPLANE:
+	case ICE_DEV_ID_E835CC_QSFP56:
+	case ICE_DEV_ID_E835CC_SFP:
+	case ICE_DEV_ID_E835C_BACKPLANE:
+	case ICE_DEV_ID_E835C_QSFP:
+	case ICE_DEV_ID_E835C_SFP:
+	case ICE_DEV_ID_E835_L_BACKPLANE:
+	case ICE_DEV_ID_E835_L_QSFP:
+	case ICE_DEV_ID_E835_L_SFP:
 		hw->mac_type = ICE_MAC_E830;
 		break;
 	default:
diff --git a/sys/dev/ice/ice_devids.h b/sys/dev/ice/ice_devids.h
index 3f91e9dfbcaf..74712c61ae8e 100644
--- a/sys/dev/ice/ice_devids.h
+++ b/sys/dev/ice/ice_devids.h
@@ -62,6 +62,24 @@
 #define ICE_DEV_ID_E830C_SFP            0x12DA
 /* Intel(R) Ethernet Controller E830-L for SFP */
 #define ICE_DEV_ID_E830_L_SFP           0x12DE
+/* Intel(R) Ethernet Controller E835-CC for backplane */
+#define ICE_DEV_ID_E835CC_BACKPLANE	0x1248
+/* Intel(R) Ethernet Controller E835-CC for QSFP */
+#define ICE_DEV_ID_E835CC_QSFP56	0x1249
+/* Intel(R) Ethernet Controller E835-CC for SFP */
+#define ICE_DEV_ID_E835CC_SFP		0x124A
+/* Intel(R) Ethernet Controller E835-C for backplane */
+#define ICE_DEV_ID_E835C_BACKPLANE	0x1261
+/* Intel(R) Ethernet Controller E835-C for QSFP */
+#define ICE_DEV_ID_E835C_QSFP		0x1262
+/* Intel(R) Ethernet Controller E835-C for SFP */
+#define ICE_DEV_ID_E835C_SFP		0x1263
+/* Intel(R) Ethernet Controller E835-L for backplane */
+#define ICE_DEV_ID_E835_L_BACKPLANE	0x1265
+/* Intel(R) Ethernet Controller E835-L for QSFP */
+#define ICE_DEV_ID_E835_L_QSFP		0x1266
+/* Intel(R) Ethernet Controller E835-L for SFP */
+#define ICE_DEV_ID_E835_L_SFP		0x1267
 /* Intel(R) Ethernet Controller E810-C for backplane */
 #define ICE_DEV_ID_E810C_BACKPLANE	0x1591
 /* Intel(R) Ethernet Controller E810-C for QSFP */
diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h
index 2a51a7394424..46965f4124bc 100644
--- a/sys/dev/ice/ice_drv_info.h
+++ b/sys/dev/ice/ice_drv_info.h
@@ -218,6 +218,45 @@ static const pci_vendor_info_t ice_vendor_info_array[] = {
 		  "Intel(R) Ethernet Network Adapter E830-XXV-2"),
 	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_SFP,
 		"Intel(R) Ethernet Connection E830-L for SFP"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_BACKPLANE,
+		"Intel(R) Ethernet Connection E835-CC for backplane"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+		  ICE_INTEL_VENDOR_ID, 0x0001, 0,
+		  "Intel(R) Ethernet Network Adapter E835-C-Q2"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+		  ICE_INTEL_VENDOR_ID, 0x0002, 0,
+		  "Intel(R) Ethernet Network Adapter E835-C-Q2 for OCP 3.0"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+		  ICE_INTEL_VENDOR_ID, 0x0003, 0,
+		  "Intel(R) Ethernet Network Adapter E835-CC-Q1"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+		  ICE_INTEL_VENDOR_ID, 0x0004, 0,
+		  "Intel(R) Ethernet Network Adapter E835-CC-Q1 for OCP 3.0"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+		"Intel(R) Ethernet Connection E835-CC for QSFP56"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+		  ICE_INTEL_VENDOR_ID, 0x0001, 0,
+		  "Intel(R) Ethernet Network Adapter E835-XXV-2 for OCP 3.0"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+		  ICE_INTEL_VENDOR_ID, 0x0003, 0,
+		  "Intel(R) Ethernet Network Adapter E835-XXV-2"),
+	PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+		  ICE_INTEL_VENDOR_ID, 0x0004, 0,
+		  "Intel(R) Ethernet Network Adapter E835-XXV-4 for OCP 3.0"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+		"Intel(R) Ethernet Connection E835-CC for SFP"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_BACKPLANE,
+		"Intel(R) Ethernet Connection E835-C for backplane"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_QSFP,
+		"Intel(R) Ethernet Connection E835-C for QSFP"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_SFP,
+		"Intel(R) Ethernet Connection E835-C for SFP"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_BACKPLANE,
+		"Intel(R) Ethernet Connection E835-L for backplane"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_QSFP,
+		"Intel(R) Ethernet Connection E835-L for QSFP"),
+	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_SFP,
+		"Intel(R) Ethernet Connection E835-L for SFP"),
 	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_BACKPLANE,
 		"Intel(R) Ethernet Connection E825-C for backplane"),
 	PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_QSFP,
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index ffa8dc096adc..80e37341b3dc 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -77,7 +77,7 @@ static int iommu_check_free;
 #endif
 
 static void
-intel_gas_init(void)
+intel_gas_init(void *dummy __unused)
 {
 
 	iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 6d08bd49bc04..1d36fd11f368 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -192,6 +192,8 @@ static int  ixgbe_if_i2c_req(if_ctx_t, struct ifi2creq *);
 static bool ixgbe_if_needs_restart(if_ctx_t, enum iflib_restart_event);
 int ixgbe_intr(void *);
 
+static int ixgbe_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
+
 /************************************************************************
  * Function prototypes
  ************************************************************************/
@@ -239,6 +241,13 @@ static void ixgbe_setup_vlan_hw_support(if_ctx_t);
 static void ixgbe_config_gpie(struct ixgbe_softc *);
 static void ixgbe_config_delay_values(struct ixgbe_softc *);
 
+static void ixgbe_add_debug_sysctls(struct ixgbe_softc *sc);
+static void ixgbe_add_debug_dump_sysctls(struct ixgbe_softc *sc);
+static int  ixgbe_debug_dump_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd);
+static u8   ixgbe_debug_dump_print_cluster(struct ixgbe_softc *sc,
+    struct sbuf *sbuf, u8 cluster_id);
+static int ixgbe_nvm_access_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd);
+
 /* Sysctl handlers */
 static int  ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS);
 static int  ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS);
@@ -260,6 +269,9 @@ static int  ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS);
 static int  ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS);
 static int  ixgbe_sysctl_tso_tcp_flags_mask(SYSCTL_HANDLER_ARGS);
 
+static int  ixgbe_sysctl_debug_dump_set_clusters(SYSCTL_HANDLER_ARGS);
+static int  ixgbe_sysctl_dump_debug_dump(SYSCTL_HANDLER_ARGS);
+
 /* Deferred interrupt tasklets */
 static void ixgbe_handle_msf(void *);
 static void ixgbe_handle_mod(void *);
@@ -330,6 +342,7 @@ static device_method_t ixgbe_if_methods[] = {
 	DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter),
 	DEVMETHOD(ifdi_i2c_req, ixgbe_if_i2c_req),
 	DEVMETHOD(ifdi_needs_restart, ixgbe_if_needs_restart),
+	DEVMETHOD(ifdi_priv_ioctl, ixgbe_if_priv_ioctl),
 #ifdef PCI_IOV
 	DEVMETHOD(ifdi_iov_init, ixgbe_if_iov_init),
 	DEVMETHOD(ifdi_iov_uninit, ixgbe_if_iov_uninit),
@@ -1015,6 +1028,8 @@ ixgbe_if_attach_pre(if_ctx_t ctx)
 	if (hw->mac.type == ixgbe_mac_E610)
 		ixgbe_init_aci(hw);
 
+	sc->do_debug_dump = false;
+
 	if (hw->mac.ops.fw_recovery_mode &&
 	    hw->mac.ops.fw_recovery_mode(hw)) {
 		device_printf(dev,
@@ -1395,6 +1410,248 @@ ixgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
 }
 
 /************************************************************************
+ * ixgbe_if_priv_ioctl - Ioctl handler for driver
+ *
+ *   Handler for custom driver specific ioctls
+ *
+ *   return 0 on success, positive on failure
+ ************************************************************************/
+static int
+ixgbe_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
+{
+	struct ixgbe_softc *sc = iflib_get_softc(ctx);
+	struct ifdrv *ifd;
+	device_t dev = sc->dev;
+
+	/* Make sure the command type is valid */
+	switch (command) {
+	case SIOCSDRVSPEC:
+	case SIOCGDRVSPEC:
+		/* Accepted commands */
+		break;
+	case SIOCGPRIVATE_0:
+		/*
+		 * Although we do not support this ioctl command, it's expected
+		 * that iflib will forward it to the IFDI_PRIV_IOCTL handler.
+		 * Do not print a message in this case.
+		 */
+		return (ENOTSUP);
+	default:
+		/*
+		 * If we get a different command for this function, it's
+		 * definitely unexpected, so log a message indicating what
+		 * command we got for debugging purposes.
+		 */
+		device_printf(dev,
+			"%s: unexpected ioctl command %08lx\n",
+			__func__, command);
+		return (EINVAL);
+	}
+
+	ifd = (struct ifdrv *)data;
+
+	switch (ifd->ifd_cmd) {
+	case IXGBE_NVM_ACCESS:
+		IOCTL_DEBUGOUT("ioctl: NVM ACCESS");
+		return (ixgbe_nvm_access_ioctl(sc, ifd));
+	case IXGBE_DEBUG_DUMP:
+		IOCTL_DEBUGOUT("ioctl: DEBUG DUMP");
+		return (ixgbe_debug_dump_ioctl(sc, ifd));
+	default:
+		IOCTL_DEBUGOUT1(
+		    "ioctl: UNKNOWN SIOC(S|G)DRVSPEC (0x%X) command\n",
+		    (int)ifd->ifd_cmd);
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+/************************************************************************
+ * ixgbe_nvm_access_ioctl
+ *
+ *   Handles an NVM access ioctl request
+ ************************************************************************/
+static int
+ixgbe_nvm_access_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd)
+{
+	struct ixgbe_nvm_access_data *data;
+	struct ixgbe_nvm_access_cmd *cmd;
+	struct ixgbe_hw *hw = &sc->hw;
+	size_t ifd_len = ifd->ifd_len;
+	size_t malloc_len;
+	device_t dev = sc->dev;
+	u8 *nvm_buffer;
+	s32 error = 0;
+
+	/*
+	 * ifioctl forwards SIOCxDRVSPEC to iflib without conducting
+	 * a privilege check. Subsequently, iflib passes the ioctl to the driver
+	 * without verifying privileges. To prevent non-privileged threads from
+	 * accessing this interface, perform a privilege check at this point.
+	 */
+	error = priv_check(curthread, PRIV_DRIVER);
+	if (error)
+		return (error);
+
+	if (ifd_len < sizeof(*cmd)) {
+		device_printf(dev,
+		    "%s: ifdrv length is too small. Got %zu, "
+		    "but expected %zu\n",
+		    __func__, ifd_len, sizeof(*cmd));
+		return (EINVAL);
+	}
+
+	if (ifd->ifd_data == NULL) {
+		device_printf(dev, "%s: No ifd data buffer.\n",
+		     __func__);
+		return (EINVAL);
+	}
+
+	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
+
+	nvm_buffer = (u8 *)malloc(malloc_len, M_IXGBE, M_ZERO | M_NOWAIT);
+	if (!nvm_buffer)
+		return (ENOMEM);
+
+	/* Copy the NVM access command and data in from user space */
+	error = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
+	if (error) {
+		device_printf(dev, "%s: Failed to copy data in, error: %d\n",
+		    __func__, error);
+		goto cleanup_free_nvm_buffer;
+	}
+
+	/*
+	 * The NVM command structure is immediately followed by data which
+	 * varies in size based on the command.
+	 */
+	cmd = (struct ixgbe_nvm_access_cmd *)nvm_buffer;
+	data = (struct ixgbe_nvm_access_data *)
+	    (nvm_buffer + sizeof(struct ixgbe_nvm_access_cmd));
+
+	/* Handle the NVM access request */
+	error = ixgbe_handle_nvm_access(hw, cmd, data);
+	if (error) {
+		device_printf(dev, "%s: NVM access request failed, error %d\n",
+		    __func__, error);
+	}
+
+	/* Copy the possibly modified contents of the handled request out */
+	error = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
+	if (error) {
+		device_printf(dev, "%s: Copying response back to "
+		    "user space failed, error %d\n",
+		    __func__, error);
+		goto cleanup_free_nvm_buffer;
+	}
+
+cleanup_free_nvm_buffer:
+	free(nvm_buffer, M_IXGBE);
+	return (error);
+}
+
+/************************************************************************
+ * ixgbe_debug_dump_ioctl
+ *
+ *   Makes debug dump of internal FW/HW data.
+ ************************************************************************/
+static int
+ixgbe_debug_dump_ioctl(struct ixgbe_softc *sc, struct ifdrv *ifd)
+{
+	struct ixgbe_debug_dump_cmd *dd_cmd;
+	struct ixgbe_hw *hw = &sc->hw;
+	size_t ifd_len = ifd->ifd_len;
+	device_t dev = sc->dev;
+	s32 error = 0;
+
+	if (!(sc->feat_en & IXGBE_FEATURE_DBG_DUMP))
+		return (ENODEV);
+
+	/* Data returned from ACI command */
+	u16 ret_buf_size = 0;
+	u16 ret_next_cluster = 0;
+	u16 ret_next_table = 0;
+	u32 ret_next_index = 0;
+
+	/*
+	 * ifioctl forwards SIOCxDRVSPEC to iflib without conducting
+	 * a privilege check. Subsequently, iflib passes the ioctl to the driver
+	 * without verifying privileges. To prevent non-privileged threads from
+	 * accessing this interface, perform a privilege check at this point.
+	 */
+	error = priv_check(curthread, PRIV_DRIVER);
+	if (error)
+		return (error);
+
+	if (ifd_len < sizeof(*dd_cmd)) {
+		device_printf(dev,
+		    "%s: ifdrv length is too small. Got %zu, "
+		    "but expected %zu\n",
+		    __func__, ifd_len, sizeof(*dd_cmd));
+		return (EINVAL);
+	}
+
+	if (ifd->ifd_data == NULL) {
+		device_printf(dev, "%s: No ifd data buffer.\n",
+		     __func__);
+		return (EINVAL);
+	}
+
+	dd_cmd = (struct ixgbe_debug_dump_cmd *)malloc(ifd_len, M_IXGBE,
+	    M_NOWAIT | M_ZERO);
+	if (!dd_cmd) {
+		error = -ENOMEM;
+		goto out;
+	}
+	/* copy data from userspace */
+	error = copyin(ifd->ifd_data, dd_cmd, ifd_len);
+	if (error) {
+		device_printf(dev, "%s: Failed to copy data in, error: %d\n",
+		    __func__, error);
+		goto out;
+	}
+
+	/* ACI command requires buf_size arg to be grater than 0 */
+	if (dd_cmd->data_size == 0) {
+		device_printf(dev, "%s: data_size must be greater than 0\n",
+		    __func__);
+		error = EINVAL;
+		goto out;
+	}
+
+	/* Zero the data buffer memory space */
+	memset(dd_cmd->data, 0, ifd_len - sizeof(*dd_cmd));
+
+	error = ixgbe_aci_get_internal_data(hw, dd_cmd->cluster_id,
+	    dd_cmd->table_id, dd_cmd->offset, dd_cmd->data, dd_cmd->data_size,
+	    &ret_buf_size, &ret_next_cluster, &ret_next_table, &ret_next_index);
+	if (error) {
+		device_printf(dev,
+		    "%s: Failed to get internal FW/HW data, error: %d\n",
+		    __func__, error);
+		goto out;
+	}
+
+	dd_cmd->cluster_id = ret_next_cluster;
+	dd_cmd->table_id = ret_next_table;
+	dd_cmd->offset = ret_next_index;
+	dd_cmd->data_size = ret_buf_size;
+
+	error = copyout(dd_cmd, ifd->ifd_data, ifd->ifd_len);
+	if (error) {
+		device_printf(dev,
+		    "%s: Failed to copy data out, error: %d\n",
+		    __func__, error);
+	}
+
+out:
+	free(dd_cmd, M_IXGBE);
+
+	return (error);
+}
+
+/************************************************************************
  * ixgbe_add_media_types
  ************************************************************************/
 static void
@@ -2883,6 +3140,264 @@ ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
 } /* ixgbe_sysctl_interrupt_rate_handler */
 
 /************************************************************************
+ * ixgbe_debug_dump_print_cluster
+ ************************************************************************/
+static u8
+ixgbe_debug_dump_print_cluster(struct ixgbe_softc *sc, struct sbuf *sbuf,
+    u8 cluster_id)
+{
+	u16 data_buf_size = IXGBE_ACI_MAX_BUFFER_SIZE;
+	device_t dev = sc->dev;
+	struct ixgbe_hw *hw = &sc->hw;
+	const u8 reserved_buf[8] = {};
+	int max_aci_calls = 1000;
+	int error, counter = 0;
+	u8 *data_buf;
+
+	/* Input parameters / loop variables */
+	u16 table_id = 0;
+	u32 offset = 0;
+
+	/* Data returned from ACI command */
+	u16 ret_buf_size = 0;
+	u16 ret_next_cluster = 0;
+	u16 ret_next_table = 0;
+	u32 ret_next_index = 0;
+
+	data_buf = (u8 *)malloc(data_buf_size, M_IXGBE, M_NOWAIT | M_ZERO);
+	if (!data_buf)
+		return (0);
+
+	DEBUGOUT2("%s: dumping cluster id (relative) %d\n",
+	    __func__, cluster_id);
+
+	do {
+		DEBUGOUT3("table_id 0x%04x offset 0x%08x buf_size %d\n",
+		    table_id, offset, data_buf_size);
+
+		error = ixgbe_aci_get_internal_data(hw, cluster_id, table_id,
+		    offset, data_buf, data_buf_size, &ret_buf_size,
+		    &ret_next_cluster, &ret_next_table, &ret_next_index);
+		if (error) {
+			device_printf(dev,
+			    "%s: Failed to get internal FW/HW data, error: %d, "
+			    "last aci status: %d\n",
+			    __func__, error, hw->aci.last_status);
+			break;
+		}
+
+		DEBUGOUT3("ret_table_id 0x%04x ret_offset 0x%08x "
+		    "ret_buf_size %d\n",
+		    ret_next_table, ret_next_index, ret_buf_size);
+
+		/* Print cluster id */
+		u32 print_cluster_id = (u32)cluster_id;
+		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
+		/* Print table id */
+		u32 print_table_id = (u32)table_id;
+		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
+		/* Print table length */
+		u32 print_table_length = (u32)ret_buf_size;
+		sbuf_bcat(sbuf, &print_table_length,
+		    sizeof(print_table_length));
+		/* Print current offset */
+		u32 print_curr_offset = offset;
+		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
+		/* Print reserved bytes */
+		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
+		/* Print data */
+		sbuf_bcat(sbuf, data_buf, ret_buf_size);
+
+		/* Prepare for the next loop spin */
+		memset(data_buf, 0, data_buf_size);
+
+		bool last_index = (ret_next_index == 0xffffffff);
+		bool last_table = ((ret_next_table == 0xff ||
+				    ret_next_table == 0xffff) &&
+				   last_index);
+
+		if (last_table) {
+			/* End of the cluster */
+			DEBUGOUT1("End of the cluster ID %d\n", cluster_id);
+			break;
+		} else if (last_index) {
+			/* End of the table */
+			table_id = ret_next_table;
+			offset = 0;
+		} else {
+			/* More data left in the table */
+			offset = ret_next_index;
+		}
+	} while (++counter < max_aci_calls);
+
+	if (counter >= max_aci_calls)
+		device_printf(dev, "Exceeded nr of ACI calls for cluster %d\n",
+		    cluster_id);
+
+	free(data_buf, M_IXGBE);
+
+	return (++cluster_id);
+} /* ixgbe_print_debug_dump_cluster */
+
+/************************************************************************
+ * ixgbe_sysctl_debug_dump_set_clusters
+ *
+ *   Sets the cluster to dump from FW when Debug Dump requested.
+ ************************************************************************/
+static int
+ixgbe_sysctl_debug_dump_set_clusters(SYSCTL_HANDLER_ARGS)
+{
+	struct ixgbe_softc *sc = (struct ixgbe_softc *)arg1;
+	u32 clusters = sc->debug_dump_cluster_mask;
+	device_t dev = sc->dev;
+	int error;
+
+	error = sysctl_handle_32(oidp, &clusters, 0, req);
+	if ((error) || !req->newptr)
+		return (error);
+
+	if (clusters & ~(IXGBE_DBG_DUMP_VALID_CLUSTERS_MASK)) {
+		device_printf(dev,
+		    "%s: Unrecognized parameter: %u\n",
+		    __func__, clusters);
+		sc->debug_dump_cluster_mask =
+			IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID;
+		return (EINVAL);
+	}
+
+	sc->debug_dump_cluster_mask = clusters;
+
+	return (0);
+} /* ixgbe_sysctl_debug_dump_set_clusters */
+
+/************************************************************************
+ * ixgbe_sysctl_dump_debug_dump
+ ************************************************************************/
+static int
+ixgbe_sysctl_dump_debug_dump(SYSCTL_HANDLER_ARGS)
+{
+	struct ixgbe_softc *sc = (struct ixgbe_softc *)arg1;
+	device_t dev = sc->dev;
+	struct sbuf *sbuf;
+	int error = 0;
+
+	UNREFERENCED_PARAMETER(arg2);
+
+	if (!sc->do_debug_dump) {
+		if (req->oldptr == NULL && req->newptr == NULL) {
+			error = SYSCTL_OUT(req, 0, 0);
+			return (error);
+		}
+
+		char input_buf[2] = "";
+		error = sysctl_handle_string(oidp, input_buf,
+				sizeof(input_buf), req);
+		if ((error) || (req->newptr == NULL))
+			return (error);
+
+		if (input_buf[0] == '1') {
+			if (sc->debug_dump_cluster_mask ==
+				IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID) {
+				device_printf(dev,
+				    "Debug Dump failed because an invalid "
+				    "cluster was specified.\n");
+				return (EINVAL);
+			}
+
+			sc->do_debug_dump = true;
+			return (0);
+		}
+
+		return (EINVAL);
+	}
+
+	/* Caller just wants the upper bound for size */
+	if (req->oldptr == NULL && req->newptr == NULL) {
+		size_t est_output_len = IXGBE_DBG_DUMP_BASE_SIZE;
+		if (sc->debug_dump_cluster_mask & 0x2)
+			est_output_len += IXGBE_DBG_DUMP_BASE_SIZE;
+		error = SYSCTL_OUT(req, 0, est_output_len);
+		return (error);
+	}
+
+	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
+	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
+
+	DEBUGOUT("FW Debug Dump running...\n");
+
+	if (sc->debug_dump_cluster_mask) {
+		for (u8 id = 0; id <= IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX; id++) {
+			if (sc->debug_dump_cluster_mask & BIT(id)) {
+				DEBUGOUT1("Dumping cluster ID %u...\n", id);
+				ixgbe_debug_dump_print_cluster(sc, sbuf, id);
+			}
+		}
+	} else {
+		u8 next_cluster_id = 0;
+		do {
+			DEBUGOUT1("Dumping cluster ID %u...\n",
+			    next_cluster_id);
+			next_cluster_id = ixgbe_debug_dump_print_cluster(sc,
+				sbuf, next_cluster_id);
+		} while (next_cluster_id != 0 &&
+			next_cluster_id <= IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX);
+	}
+
+	sbuf_finish(sbuf);
+	sbuf_delete(sbuf);
+
+	sc->do_debug_dump = false;
+
+	return (error);
+} /* ixgbe_sysctl_dump_debug_dump */
+
+/************************************************************************
+ * ixgbe_add_debug_dump_sysctls
+ ************************************************************************/
+static void
+ixgbe_add_debug_dump_sysctls(struct ixgbe_softc *sc)
+{
+	struct sysctl_oid_list *debug_list, *dump_list;
+	struct sysctl_oid *dump_node;
+	struct sysctl_ctx_list *ctx;
+	device_t dev = sc->dev;
+
+	ctx = device_get_sysctl_ctx(dev);
+	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
+
+	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
+	    CTLFLAG_RD, NULL, "Internal FW/HW Dump");
+	dump_list = SYSCTL_CHILDREN(dump_node);
+
+	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
+	    CTLTYPE_U32 | CTLFLAG_RW, sc, 0,
+	    ixgbe_sysctl_debug_dump_set_clusters, "SU",
+	    IXGBE_SYSCTL_DESC_DEBUG_DUMP_SET_CLUSTER);
+
+	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
+	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+	    ixgbe_sysctl_dump_debug_dump, "",
+	    IXGBE_SYSCTL_DESC_DUMP_DEBUG_DUMP);
+} /* ixgbe_add_debug_dump_sysctls */
+
+static void
+ixgbe_add_debug_sysctls(struct ixgbe_softc *sc)
+{
+	struct sysctl_oid_list *ctx_list;
+	struct sysctl_ctx_list *ctx;
+	device_t dev = sc->dev;
+
+	ctx = device_get_sysctl_ctx(dev);
+	ctx_list  = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
+	    CTLFLAG_RD, NULL, "Debug Sysctls");
+
+	if (sc->feat_en & IXGBE_FEATURE_DBG_DUMP)
+		ixgbe_add_debug_dump_sysctls(sc);
+} /* ixgbe_add_debug_sysctls */
+
+/************************************************************************
  * ixgbe_add_device_sysctls
  ************************************************************************/
 static void
@@ -2992,6 +3507,8 @@ ixgbe_add_device_sysctls(if_ctx_t ctx)
 		    CTLTYPE_INT | CTLFLAG_RW, sc, 0,
 		    ixgbe_sysctl_eee_state, "I", "EEE Power Save State");
 	}
+
+	ixgbe_add_debug_sysctls(sc);
 } /* ixgbe_add_device_sysctls */
 
 /************************************************************************
@@ -5182,6 +5699,7 @@ ixgbe_init_device_features(struct ixgbe_softc *sc)
 		break;
 	case ixgbe_mac_E610:
 		sc->feat_cap |= IXGBE_FEATURE_RECOVERY_MODE;
+		sc->feat_cap |= IXGBE_FEATURE_DBG_DUMP;
 		break;
 	default:
 		break;
@@ -5203,6 +5721,9 @@ ixgbe_init_device_features(struct ixgbe_softc *sc)
 	/* Recovery mode */
 	if (sc->feat_cap & IXGBE_FEATURE_RECOVERY_MODE)
 		sc->feat_en |= IXGBE_FEATURE_RECOVERY_MODE;
+	/* FW Debug Dump */
+	if (sc->feat_cap & IXGBE_FEATURE_DBG_DUMP)
+		sc->feat_en |= IXGBE_FEATURE_DBG_DUMP;
 
 	/* Enabled via global sysctl... */
 	/* Flow Director */
diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h
index 844064bf8543..624b71acabea 100644
--- a/sys/dev/ixgbe/ixgbe.h
+++ b/sys/dev/ixgbe/ixgbe.h
@@ -46,6 +46,7 @@
 #include <sys/module.h>
 #include <sys/sockio.h>
 #include <sys/eventhandler.h>
+#include <sys/priv.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -475,6 +476,20 @@ struct ixgbe_softc {
 	u32			feat_cap;
 	u32			feat_en;
 	u16                     lse_mask;
+
+	struct sysctl_oid       *debug_sysctls;
+	u32                     debug_dump_cluster_mask;
+	bool                    do_debug_dump;
+};
+
+struct ixgbe_debug_dump_cmd {
+	u32 offset;		/* offset to read/write from table, in bytes */
+	u8 cluster_id;		/* also used to get next cluster id */
+	u16 table_id;
+	u16 data_size;		/* size of data field, in bytes */
+	u16 reserved1;
+	u32 reserved2;
+	u8 data[];
 };
 
 /* Precision Time Sync (IEEE 1588) defines */
@@ -499,6 +514,43 @@ struct ixgbe_softc {
 #define IXGBE_PHY_CURRENT_TEMP		0xC820
 #define IXGBE_PHY_OVERTEMP_STATUS	0xC830
 
+/**
+ * The ioctl command number used by NVM update for accessing the driver for
+ * NVM access commands.
+ */
+#define IXGBE_NVM_ACCESS \
+	(((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 5)
+
+/*
+ * The ioctl command number used by a userspace tool for accessing the driver
+ * for getting debug dump data from the firmware.
+ */
+#define IXGBE_DEBUG_DUMP \
+	(((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 6)
+
+/* Debug Dump related definitions */
+#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_INVALID	0xFFFFFF
+#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_BASE	50
+#define IXGBE_ACI_DBG_DUMP_CLUSTER_ID_MAX	1
+
+#define IXGBE_DBG_DUMP_VALID_CLUSTERS_MASK	0x3
+#define IXGBE_DBG_DUMP_BASE_SIZE		(2 * 1024 * 1024)
+
+#define IXGBE_SYSCTL_DESC_DEBUG_DUMP_SET_CLUSTER		\
+"\nSelect clusters to dump with \"dump\" sysctl"		\
+"\nFlags:"							\
+"\n\t      0x1 - Link"						\
+"\n\t      0x2 - Full CSR Space, excluding RCW registers"	\
+"\n\t"								\
+"\nUse \"sysctl -x\" to view flags properly."
+
+#define IXGBE_SYSCTL_DESC_DUMP_DEBUG_DUMP 			\
+"\nWrite 1 to output a FW debug dump containing the clusters " 	\
+"specified by the \"clusters\" sysctl" 				\
+"\nThe \"-b\" flag must be used in order to dump this data " 	\
+"as binary data because" 					\
+"\nthis data is opaque and not a string."
+
 /* Sysctl help messages; displayed with sysctl -d */
 #define IXGBE_SYSCTL_DESC_ADV_SPEED	\
     "\nControl advertised link speed using these flags:\n" \
diff --git a/sys/dev/ixgbe/ixgbe_features.h b/sys/dev/ixgbe/ixgbe_features.h
index 0cef334a185f..bee9040319d8 100644
--- a/sys/dev/ixgbe/ixgbe_features.h
+++ b/sys/dev/ixgbe/ixgbe_features.h
@@ -57,6 +57,7 @@
 #define IXGBE_FEATURE_LEGACY_IRQ                (u32)(1 << 12)
 #define IXGBE_FEATURE_NEEDS_CTXD                (u32)(1 << 13)
 #define IXGBE_FEATURE_RECOVERY_MODE             (u32)(1 << 15)
+#define IXGBE_FEATURE_DBG_DUMP                  (u32)(1 << 16)
 
 /* Check for OS support.  Undefine features if not included in the OS */
 #ifndef PCI_IOV
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 261f76055901..bfaf6cd69e58 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -1480,17 +1480,33 @@ ixl_if_multi_set(if_ctx_t ctx)
 	struct ixl_pf *pf = iflib_get_softc(ctx);
 	struct ixl_vsi *vsi = &pf->vsi;
 	struct i40e_hw *hw = vsi->hw;
+	enum i40e_status_code status;
 	int mcnt;
+	if_t ifp = iflib_get_ifp(ctx);
 
 	IOCTL_DEBUGOUT("ixl_if_multi_set: begin");
 
 	/* Delete filters for removed multicast addresses */
 	ixl_del_multi(vsi, false);
 
-	mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR);
+	mcnt = min(if_llmaddr_count(ifp), MAX_MULTICAST_ADDR);
 	if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) {
-		i40e_aq_set_vsi_multicast_promiscuous(hw,
+		/* Check if promisc mode is already enabled, if yes return */
+		if (vsi->flags & IXL_FLAGS_MC_PROMISC)
+			return;
+
+		status = i40e_aq_set_vsi_multicast_promiscuous(hw,
 		    vsi->seid, TRUE, NULL);
+		if (status != I40E_SUCCESS)
+			if_printf(ifp, "Failed to enable multicast promiscuous "
+			    "mode, status: %s\n", i40e_stat_str(hw, status));
+		else {
+			if_printf(ifp, "Enabled multicast promiscuous mode\n");
+
+			/* Set the flag to track promiscuous mode */
+			vsi->flags |= IXL_FLAGS_MC_PROMISC;
+		}
+		/* Delete all existing MC filters */
 		ixl_del_multi(vsi, true);
 		return;
 	}
@@ -1693,6 +1709,13 @@ ixl_if_promisc_set(if_ctx_t ctx, int flags)
 		return (err);
 	err = i40e_aq_set_vsi_multicast_promiscuous(hw,
 	    vsi->seid, multi, NULL);
+
+	/* Update the multicast promiscuous flag based on the new state */
+	if (multi)
+		vsi->flags |= IXL_FLAGS_MC_PROMISC;
+	else
+		vsi->flags &= ~IXL_FLAGS_MC_PROMISC;
+
 	return (err);
 }
 
diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h
index 95379448b570..ab0f38307d90 100644
--- a/sys/dev/ixl/ixl.h
+++ b/sys/dev/ixl/ixl.h
@@ -202,6 +202,7 @@
 #define IXL_FLAGS_KEEP_TSO6	(1 << 1)
 #define IXL_FLAGS_USES_MSIX	(1 << 2)
 #define IXL_FLAGS_IS_VF		(1 << 3)
+#define IXL_FLAGS_MC_PROMISC	(1 << 4)
 
 #define IXL_VSI_IS_PF(v)	((v->flags & IXL_FLAGS_IS_VF) == 0)
 #define IXL_VSI_IS_VF(v)	((v->flags & IXL_FLAGS_IS_VF) != 0)
diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c
index 1752efc02fff..b62619ced5cb 100644
--- a/sys/dev/ixl/ixl_pf_main.c
+++ b/sys/dev/ixl/ixl_pf_main.c
@@ -593,24 +593,29 @@ ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
  *	Routines for multicast and vlan filter management.
  *
  *********************************************************************/
+
+/**
+ * ixl_add_multi - Add multicast filters to the hardware
+ * @vsi: The VSI structure
+ *
+ * In case number of multicast filters in the IFP exceeds 127 entries,
+ * multicast promiscuous mode will be enabled and the filters will be removed
+ * from the hardware
+ */
 void
 ixl_add_multi(struct ixl_vsi *vsi)
 {
 	if_t			ifp = vsi->ifp;
-	struct i40e_hw		*hw = vsi->hw;
 	int			mcnt = 0;
 	struct ixl_add_maddr_arg cb_arg;
 
 	IOCTL_DEBUGOUT("ixl_add_multi: begin");
 
-	mcnt = if_llmaddr_count(ifp);
-	if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) {
-		i40e_aq_set_vsi_multicast_promiscuous(hw,
-		    vsi->seid, TRUE, NULL);
-		/* delete all existing MC filters */
-		ixl_del_multi(vsi, true);
-		return;
-	}
+	/*
+	 * There is no need to check if the number of multicast addresses
+	 * exceeds the MAX_MULTICAST_ADDR threshold and set promiscuous mode
+	 * here, as all callers already handle this case.
+	 */
 
 	cb_arg.vsi = vsi;
 	LIST_INIT(&cb_arg.to_add);
@@ -633,30 +638,103 @@ ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
 		return (0);
 }
 
+/**
+ * ixl_dis_multi_promisc - Disable multicast promiscuous mode
+ * @vsi: The VSI structure
+ * @vsi_mcnt: Number of multicast filters in the VSI
+ *
+ * Disable multicast promiscuous mode based on number of entries in the IFP
+ * and the VSI, then re-add multicast filters.
+ *
+ */
+static void
+ixl_dis_multi_promisc(struct ixl_vsi *vsi, int vsi_mcnt)
+{
+	struct ifnet		*ifp = vsi->ifp;
+	struct i40e_hw		*hw = vsi->hw;
+	int			ifp_mcnt = 0;
+	enum i40e_status_code	status;
+
+	/*
+	 * Check if multicast promiscuous mode was actually enabled.
+	 * If promiscuous mode was not enabled, don't attempt to disable it.
+	 * Also, don't disable if IFF_PROMISC or IFF_ALLMULTI is set.
+	 */
+	if (!(vsi->flags & IXL_FLAGS_MC_PROMISC) ||
+	    (if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI)))
+		return;
+
+	ifp_mcnt = if_llmaddr_count(ifp);
+	/*
+	 * Equal lists or empty ifp list mean the list has not been changed
+	 * and in such case avoid disabling multicast promiscuous mode as it
+	 * was not previously enabled. Case where multicast promiscuous mode has
+	 * been enabled is when vsi_mcnt == 0 && ifp_mcnt > 0.
+	 */
+	if (ifp_mcnt == vsi_mcnt || ifp_mcnt == 0 ||
+	    ifp_mcnt >= MAX_MULTICAST_ADDR)
+		return;
+
+	status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
+	    FALSE, NULL);
+	if (status != I40E_SUCCESS) {
+		if_printf(ifp, "Failed to disable multicast promiscuous "
+		    "mode, status: %s\n", i40e_stat_str(hw, status));
+
+		return;
+	}
+
+	/* Clear the flag since promiscuous mode is now disabled */
+	vsi->flags &= ~IXL_FLAGS_MC_PROMISC;
+	if_printf(ifp, "Disabled multicast promiscuous mode\n");
+
+	ixl_add_multi(vsi);
+}
+
+/**
+ * ixl_del_multi - Delete multicast filters from the hardware
+ * @vsi: The VSI structure
+ * @all: Bool to determine if all the multicast filters should be removed
+ *
+ * In case number of multicast filters in the IFP drops to 127 entries,
+ * multicast promiscuous mode will be disabled and the filters will be reapplied
+ * to the hardware.
+ */
 void
 ixl_del_multi(struct ixl_vsi *vsi, bool all)
 {
-	struct ixl_ftl_head	to_del;
+	int			to_del_cnt = 0, vsi_mcnt = 0;
 	if_t			ifp = vsi->ifp;
 	struct ixl_mac_filter	*f, *fn;
-	int			mcnt = 0;
+	struct ixl_ftl_head	to_del;
 
 	IOCTL_DEBUGOUT("ixl_del_multi: begin");
 
 	LIST_INIT(&to_del);
 	/* Search for removed multicast addresses */
 	LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, fn) {
-		if ((f->flags & IXL_FILTER_MC) == 0 ||
-		    (!all && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0)))
+		if ((f->flags & IXL_FILTER_MC) == 0)
+			continue;
+
+		/* Count all the multicast filters in the VSI for comparison */
+		vsi_mcnt++;
+
+		if (!all && if_foreach_llmaddr(ifp, ixl_match_maddr, f) != 0)
 			continue;
 
 		LIST_REMOVE(f, ftle);
 		LIST_INSERT_HEAD(&to_del, f, ftle);
-		mcnt++;
+		to_del_cnt++;
 	}
 
-	if (mcnt > 0)
-		ixl_del_hw_filters(vsi, &to_del, mcnt);
+	if (to_del_cnt > 0) {
+		ixl_del_hw_filters(vsi, &to_del, to_del_cnt);
+		return;
+	}
+
+	ixl_dis_multi_promisc(vsi, vsi_mcnt);
+
+	IOCTL_DEBUGOUT("ixl_del_multi: end");
 }
 
 void
diff --git a/sys/dev/mii/mv88e151x.c b/sys/dev/mii/mv88e151x.c
index 618ad81471c9..fb03b2a7a917 100644
--- a/sys/dev/mii/mv88e151x.c
+++ b/sys/dev/mii/mv88e151x.c
@@ -97,7 +97,7 @@ mv88e151x_attach(device_t dev)
 {
 	const struct mii_attach_args *ma;
 	struct mii_softc *sc;
-	uint32_t cop_cap, cop_extcap;
+	uint32_t cop_cap = 0, cop_extcap = 0;
 
 	sc = device_get_softc(dev);
 	ma = device_get_ivars(dev);
@@ -224,10 +224,12 @@ mv88e151x_fiber_status(struct mii_softc *phy)
 		else if (reg & MV88E151X_STATUS_LINK &&
 		    reg & MV88E151X_STATUS_SYNC &&
 		    (reg & MV88E151X_STATUS_ENERGY) == 0) {
-			if ((reg & MV88E151X_STATUS_SPEED_MASK) ==
+			if (((reg & MV88E151X_STATUS_SPEED_MASK) >>
+			    MV88E151X_STATUS_SPEED_SHIFT) ==
 			    MV88E151X_STATUS_SPEED_1000)
 				mii->mii_media_active |= IFM_1000_SX;
-			else if ((reg & MV88E151X_STATUS_SPEED_MASK) ==
+			else if (((reg & MV88E151X_STATUS_SPEED_MASK) >>
+			    MV88E151X_STATUS_SPEED_SHIFT) ==
 			    MV88E151X_STATUS_SPEED_100)
 				mii->mii_media_active |= IFM_100_FX;
 			else
diff --git a/sys/dev/mps/mps_sas.c b/sys/dev/mps/mps_sas.c
index d69c8ea5fded..fa0f817ed67b 100644
--- a/sys/dev/mps/mps_sas.c
+++ b/sys/dev/mps/mps_sas.c
@@ -858,7 +858,7 @@ mps_detach_sas(struct mps_softc *sc)
 	if (sassc->devq != NULL)
 		cam_simq_free(sassc->devq);
 
-	for(i=0; i< sassc->maxtargets ;i++) {
+	for (i = 0; i < sassc->maxtargets; i++) {
 		targ = &sassc->targets[i];
 		SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) {
 			free(lun, M_MPT2);
@@ -3396,7 +3396,7 @@ mpssas_realloc_targets(struct mps_softc *sc, int maxtargets)
 	 * the allocated LUNs for each target and then the target buffer
 	 * itself.
 	 */
-	for (i=0; i< maxtargets; i++) {
+	for (i = 0; i < maxtargets; i++) {
 		targ = &sassc->targets[i];
 		SLIST_FOREACH_SAFE(lun, &targ->luns, lun_link, lun_tmp) {
 			free(lun, M_MPT2);
diff --git a/sys/dev/mpt/mpt_raid.c b/sys/dev/mpt/mpt_raid.c
index 5ff08ffcf2b3..2b868f6ef070 100644
--- a/sys/dev/mpt/mpt_raid.c
+++ b/sys/dev/mpt/mpt_raid.c
@@ -830,7 +830,7 @@ mpt_is_raid_volume(struct mpt_softc *mpt, target_id_t tgt)
 	}
 	ioc_vol = mpt->ioc_page2->RaidVolume;
 	ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes;
-	for (;ioc_vol != ioc_last_vol; ioc_vol++) {
+	for (; ioc_vol != ioc_last_vol; ioc_vol++) {
 		if (ioc_vol->VolumeID == tgt) {
 			return (1);
 		}
@@ -1406,7 +1406,7 @@ mpt_refresh_raid_data(struct mpt_softc *mpt)
 
 	ioc_vol = mpt->ioc_page2->RaidVolume;
 	ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes;
-	for (;ioc_vol != ioc_last_vol; ioc_vol++) {
+	for (; ioc_vol != ioc_last_vol; ioc_vol++) {
 		struct mpt_raid_volume *mpt_vol;
 
 		mpt_vol = mpt->raid_volumes + ioc_vol->VolumePageNumber;
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 8cc543d54c2e..ac267a66d669 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -738,6 +738,7 @@ nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror)
 
 out_rem:
 	vm_map_remove(kernel_map, e->kva, e->kva + e->size);
+	e->obj = NULL; /* reference consumed by vm_map_remove() */
 out_rel:
 	vm_object_deallocate(e->obj);
 	e->obj = NULL;
diff --git a/sys/dev/nfe/if_nfe.c b/sys/dev/nfe/if_nfe.c
index 4625c2616562..265181ef7ad0 100644
--- a/sys/dev/nfe/if_nfe.c
+++ b/sys/dev/nfe/if_nfe.c
@@ -2078,7 +2078,7 @@ nfe_rxeof(struct nfe_softc *sc, int count, int *rx_npktsp)
 	bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
 	    BUS_DMASYNC_POSTREAD);
 
-	for (prog = 0;;NFE_INC(sc->rxq.cur, NFE_RX_RING_COUNT), vtag = 0) {
+	for (prog = 0; ; NFE_INC(sc->rxq.cur, NFE_RX_RING_COUNT), vtag = 0) {
 		if (count <= 0)
 			break;
 		count--;
@@ -2192,7 +2192,7 @@ nfe_jrxeof(struct nfe_softc *sc, int count, int *rx_npktsp)
 	bus_dmamap_sync(sc->jrxq.jrx_desc_tag, sc->jrxq.jrx_desc_map,
 	    BUS_DMASYNC_POSTREAD);
 
-	for (prog = 0;;NFE_INC(sc->jrxq.jcur, NFE_JUMBO_RX_RING_COUNT),
+	for (prog = 0; ; NFE_INC(sc->jrxq.jcur, NFE_JUMBO_RX_RING_COUNT),
 	    vtag = 0) {
 		if (count <= 0)
 			break;
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index ead91f0d01fe..d119f9877aaa 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -51,7 +51,7 @@ int32_t		nvme_retry_count;
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
 static void
-nvme_init(void)
+nvme_init(void *dummy __unused)
 {
 	uint32_t	i;
 
@@ -62,7 +62,7 @@ nvme_init(void)
 SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
 
 static void
-nvme_uninit(void)
+nvme_uninit(void *dummy __unused)
 {
 }
 
diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h
index 17c5cdb4db87..f4ea08f129c0 100644
--- a/sys/dev/nvme/nvme.h
+++ b/sys/dev/nvme/nvme.h
@@ -1507,9 +1507,7 @@ struct nvme_namespace_data {
 	uint8_t			eui64[8];
 
 	/** lba format support */
-	uint32_t		lbaf[16];
-
-	uint8_t			reserved7[192];
+	uint32_t		lbaf[64];
 
 	uint8_t			vendor_specific[3712];
 } __packed __aligned(4);
@@ -2155,8 +2153,6 @@ static inline
 void	nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
 {
 #if _BYTE_ORDER != _LITTLE_ENDIAN
-	int i;
-
 	s->nsze = le64toh(s->nsze);
 	s->ncap = le64toh(s->ncap);
 	s->nuse = le64toh(s->nuse);
@@ -2175,7 +2171,7 @@ void	nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
 	s->anagrpid = le32toh(s->anagrpid);
 	s->nvmsetid = le16toh(s->nvmsetid);
 	s->endgid = le16toh(s->endgid);
-	for (i = 0; i < 16; i++)
+	for (unsigned i = 0; i < nitems(s->lbaf); i++)
 		s->lbaf[i] = le32toh(s->lbaf[i]);
 #endif
 }
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 52f9e12f8f9a..52e9fcbbebcd 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -463,13 +463,13 @@ static __inline void
 nvme_completion_poll(struct nvme_completion_poll_status *status)
 {
 	int timeout = ticks + 10 * hz;
-	sbintime_t delta_t = SBT_1US;
+	sbintime_t delta = SBT_1US;
 
 	while (!atomic_load_acq_int(&status->done)) {
 		if (timeout - ticks < 0)
 			panic("NVME polled command failed to complete within 10s.");
-		pause_sbt("nvme", delta_t, 0, C_PREL(1));
-		delta_t = min(SBT_1MS, delta_t * 3 / 2);
+		pause_sbt("nvme", delta, 0, C_PREL(1));
+		delta = min(SBT_1MS, delta + delta / 2);
 	}
 }
 
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index a06774a64761..7693aa6d54d3 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -391,7 +391,7 @@ nvme_sim_controller_fail(void *ctrlr_arg)
 struct nvme_consumer *consumer_cookie;
 
 static void
-nvme_sim_init(void)
+nvme_sim_init(void *dummy __unused)
 {
 	if (nvme_use_nvd)
 		return;
@@ -404,7 +404,7 @@ SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY,
     nvme_sim_init, NULL);
 
 static void
-nvme_sim_uninit(void)
+nvme_sim_uninit(void *dummy __unused)
 {
 	if (nvme_use_nvd)
 		return;
diff --git a/sys/dev/ocs_fc/ocs_mgmt.c b/sys/dev/ocs_fc/ocs_mgmt.c
index 726b499f28ba..5b7f6557c017 100644
--- a/sys/dev/ocs_fc/ocs_mgmt.c
+++ b/sys/dev/ocs_fc/ocs_mgmt.c
@@ -226,7 +226,7 @@ ocs_mgmt_get_list(ocs_t *ocs, ocs_textbuf_t *textbuf)
 
 	ocs_mgmt_start_unnumbered_section(textbuf, "ocs");
 
-	for (i=0;i<ARRAY_SIZE(mgmt_table);i++) {
+	for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) {
 		access = 0;
 		if (mgmt_table[i].get_handler) {
 			access |= MGMT_MODE_RD;
@@ -305,7 +305,7 @@ ocs_mgmt_get(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf)
 	if (ocs_strncmp(name, qualifier, strlen(qualifier)) == 0) {
 		char *unqualified_name = name + strlen(qualifier) + 1;
 
-		for (i=0;i<ARRAY_SIZE(mgmt_table);i++) {
+		for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) {
 			if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) {
 				if (mgmt_table[i].get_handler) {
 					mgmt_table[i].get_handler(ocs, name, textbuf);
@@ -387,7 +387,7 @@ ocs_mgmt_set(ocs_t *ocs, char *name, char *value)
 		char *unqualified_name = name + strlen(qualifier) +1;
 
 		/* See if it's a value I can set */
-		for (i=0;i<ARRAY_SIZE(mgmt_table);i++) {
+		for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) {
 			if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) {
 				if (mgmt_table[i].set_handler) {
 					return mgmt_table[i].set_handler(ocs, name, value);
@@ -469,7 +469,7 @@ ocs_mgmt_exec(ocs_t *ocs, char *action, void *arg_in,
 		char *unqualified_name = action + strlen(qualifier) +1;
 
 		/* See if it's an action I can perform */
-		for (i=0;i<ARRAY_SIZE(mgmt_table); i++) {
+		for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) {
 			if (ocs_strcmp(unqualified_name, mgmt_table[i].name) == 0) {
 				if (mgmt_table[i].action_handler) {
 					return mgmt_table[i].action_handler(ocs, action, arg_in, arg_in_length,
@@ -527,7 +527,7 @@ ocs_mgmt_get_all(ocs_t *ocs, ocs_textbuf_t *textbuf)
 
 	ocs_mgmt_start_unnumbered_section(textbuf, "ocs");
 
-	for (i=0;i<ARRAY_SIZE(mgmt_table);i++) {
+	for (i = 0; i < ARRAY_SIZE(mgmt_table); i++) {
 		if (mgmt_table[i].get_handler) {
 			mgmt_table[i].get_handler(ocs, mgmt_table[i].name, textbuf);
 		} else if (mgmt_table[i].action_handler) {
@@ -1212,7 +1212,7 @@ get_sfp_a2(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf)
 		int buffer_remaining = (SFP_PAGE_SIZE * 3) + 1;
 		int bytes_added;
 
-		for (i=0; i < bytes_read; i++) {
+		for (i = 0; i < bytes_read; i++) {
 			bytes_added = ocs_snprintf(d, buffer_remaining, "%02x ", *s);
 			++s;
 			d += bytes_added;
@@ -2040,7 +2040,7 @@ get_profile_list(ocs_t *ocs, char *name, ocs_textbuf_t *textbuf)
 			result_buf = ocs_malloc(ocs, BUFFER_SIZE, OCS_M_ZERO);
 			bytes_left = BUFFER_SIZE;
 
-			for (i=0; i<result.list->num_descriptors; i++) {
+			for (i = 0; i < result.list->num_descriptors; i++) {
 				sprintf(result_line, "0x%02x:%s\n", result.list->descriptors[i].profile_id,
 					result.list->descriptors[i].profile_description);
 				if (strlen(result_line) < bytes_left) {
diff --git a/sys/dev/pci/controller/pci_n1sdp.c b/sys/dev/pci/controller/pci_n1sdp.c
index 487041bc78e4..22f0ea27d45b 100644
--- a/sys/dev/pci/controller/pci_n1sdp.c
+++ b/sys/dev/pci/controller/pci_n1sdp.c
@@ -345,6 +345,17 @@ n1sdp_pcie_write_config(device_t dev, u_int bus, u_int slot,
 	bus_space_write_4(t, h, offset & ~3, data);
 }
 
+static int
+n1sdp_pcie_acpi_request_feature(device_t pcib __unused, device_t dev __unused,
+    enum pci_feature feature __unused)
+{
+	/*
+	 * HotPlug isn't supported on the N1SDP as it causes an interrupt storm
+	 */
+	return (EINVAL);
+}
+
+
 static device_method_t n1sdp_pcie_acpi_methods[] = {
 	DEVMETHOD(device_probe,		n1sdp_pcie_acpi_probe),
 	DEVMETHOD(device_attach,	n1sdp_pcie_acpi_attach),
@@ -352,6 +363,7 @@ static device_method_t n1sdp_pcie_acpi_methods[] = {
 	/* pcib interface */
 	DEVMETHOD(pcib_read_config,	n1sdp_pcie_read_config),
 	DEVMETHOD(pcib_write_config,	n1sdp_pcie_write_config),
+	DEVMETHOD(pcib_request_feature, n1sdp_pcie_acpi_request_feature),
 
 	DEVMETHOD_END
 };
diff --git a/sys/dev/ppc/ppc.c b/sys/dev/ppc/ppc.c
index 9870379e2eba..de75f4747709 100644
--- a/sys/dev/ppc/ppc.c
+++ b/sys/dev/ppc/ppc.c
@@ -1389,7 +1389,7 @@ ppc_exec_microseq(device_t dev, struct ppb_microseq **p_msq)
 
 		/* let's suppose the next instr. is the same */
 		prefetch:
-			for (;mi->opcode == MS_OP_RASSERT; INCR_PC)
+			for (; mi->opcode == MS_OP_RASSERT; INCR_PC)
 				w_reg(mi->arg[0].i, ppc, (char)mi->arg[1].i);
 
 			if (mi->opcode == MS_OP_DELAY) {
diff --git a/sys/dev/smartpqi/smartpqi_event.c b/sys/dev/smartpqi/smartpqi_event.c
index f000d9ce9db3..88dcf45dd08a 100644
--- a/sys/dev/smartpqi/smartpqi_event.c
+++ b/sys/dev/smartpqi/smartpqi_event.c
@@ -115,7 +115,7 @@ pqisrc_ack_all_events(void *arg1)
 
 
 	pending_event = &softs->pending_events[0];
-	for (i=0; i < PQI_NUM_SUPPORTED_EVENTS; i++) {
+	for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) {
 		if (pending_event->pending == true) {
 			pending_event->pending = false;
 			pqisrc_acknowledge_event(softs, pending_event);
@@ -417,7 +417,7 @@ pqisrc_report_event_config(pqisrc_softstate_t *softs)
 	softs->event_config.num_event_descriptors = MIN(event_config_p->num_event_descriptors,
 		                                            PQI_MAX_EVENT_DESCRIPTORS) ;
 
-        for (i=0; i < softs->event_config.num_event_descriptors ;i++){
+        for (i = 0; i < softs->event_config.num_event_descriptors; i++) {
 		softs->event_config.descriptors[i].event_type =
 					event_config_p->descriptors[i].event_type;
 	}
@@ -477,7 +477,7 @@ pqisrc_set_event_config(pqisrc_softstate_t *softs)
 	event_config_p->num_event_descriptors = softs->event_config.num_event_descriptors;
 
 
-	for (i=0; i < softs->event_config.num_event_descriptors ; i++){
+	for (i = 0; i < softs->event_config.num_event_descriptors; i++) {
 		event_config_p->descriptors[i].event_type =
 					softs->event_config.descriptors[i].event_type;
 		if( pqisrc_event_type_to_event_index(event_config_p->descriptors[i].event_type) != -1)
diff --git a/sys/dev/smartpqi/smartpqi_queue.c b/sys/dev/smartpqi/smartpqi_queue.c
index 2e80b01b5436..f05c951cd4f9 100644
--- a/sys/dev/smartpqi/smartpqi_queue.c
+++ b/sys/dev/smartpqi/smartpqi_queue.c
@@ -700,7 +700,7 @@ pqisrc_create_op_obq(pqisrc_softstate_t *softs,
     	} else {
 		int i = 0;
 		DBG_WARN("Error Status Descriptors\n");
-		for(i = 0; i < 4;i++)
+		for (i = 0; i < 4; i++)
 			DBG_WARN(" %x ",admin_resp.resp_type.create_op_oq.status_desc[i]);
 	}
 
@@ -743,7 +743,7 @@ pqisrc_create_op_ibq(pqisrc_softstate_t *softs,
 	} else {
 		int i = 0;
 		DBG_WARN("Error Status Decsriptors\n");
-		for(i = 0; i < 4;i++)
+		for (i = 0; i < 4; i++)
 			DBG_WARN(" %x ",admin_resp.resp_type.create_op_iq.status_desc[i]);
 	}
 
diff --git a/sys/dev/sym/sym_hipd.c b/sys/dev/sym/sym_hipd.c
index fa65d544e17d..b4e5c1075fb4 100644
--- a/sys/dev/sym/sym_hipd.c
+++ b/sys/dev/sym/sym_hipd.c
@@ -3266,7 +3266,7 @@ static void sym_init (hcb_p np, int reason)
 	 *  Reinitialize usrwide.
 	 *  Prepare sync negotiation according to actual SCSI bus mode.
 	 */
-	for (i=0;i<SYM_CONF_MAX_TARGET;i++) {
+	for (i = 0; i < SYM_CONF_MAX_TARGET; i++) {
 		tcb_p tp = &np->target[i];
 
 		tp->to_reset  = 0;
@@ -3715,7 +3715,7 @@ static void sym_log_hard_error(hcb_p np, u_short sist, u_char dstat)
 	}
 
         printf ("%s: regdump:", sym_name(np));
-        for (i=0; i<24;i++)
+        for (i = 0; i < 24; i++)
             printf (" %02x", (unsigned)INB_OFF(i));
         printf (".\n");
 
@@ -5527,8 +5527,8 @@ static int sym_show_msg (u_char * msg)
 	u_char i;
 	printf ("%x",*msg);
 	if (*msg==M_EXTENDED) {
-		for (i=1;i<8;i++) {
-			if (i-1>msg[1]) break;
+		for (i = 1; i < 8; i++) {
+			if (i - 1 > msg[1]) break;
 			printf ("-%x",msg[i]);
 		}
 		return (i+1);
@@ -6744,10 +6744,10 @@ restart_test:
 	/*
 	 *  Wait 'til done (with timeout)
 	 */
-	for (i=0; i<SYM_SNOOP_TIMEOUT; i++)
+	for (i = 0; i < SYM_SNOOP_TIMEOUT; i++)
 		if (INB(nc_istat) & (INTF|SIP|DIP))
 			break;
-	if (i>=SYM_SNOOP_TIMEOUT) {
+	if (i >= SYM_SNOOP_TIMEOUT) {
 		printf ("CACHE TEST FAILED: timeout.\n");
 		return (0x20);
 	}
diff --git a/sys/dev/tws/tws.c b/sys/dev/tws/tws.c
index af151c8c4f06..fccd6689a6aa 100644
--- a/sys/dev/tws/tws.c
+++ b/sys/dev/tws/tws.c
@@ -311,7 +311,7 @@ attach_fail_4:
     if (sc->cmd_tag)
 	    bus_dma_tag_destroy(sc->cmd_tag);
 attach_fail_3:
-    for(i=0;i<sc->irqs;i++) {
+    for (i = 0; i < sc->irqs; i++) {
         if ( sc->irq_res[i] ){
             if (bus_release_resource(sc->tws_dev,
                  SYS_RES_IRQ, sc->irq_res_id[i], sc->irq_res[i]))
@@ -369,7 +369,7 @@ tws_detach(device_t dev)
     tws_teardown_intr(sc);
 
     /* Release irq resource */
-    for(i=0;i<sc->irqs;i++) {
+    for (i = 0; i < sc->irqs; i++) {
         if ( sc->irq_res[i] ){
             if (bus_release_resource(sc->tws_dev,
                      SYS_RES_IRQ, sc->irq_res_id[i], sc->irq_res[i]))
@@ -402,7 +402,7 @@ tws_detach(device_t dev)
             TWS_TRACE(sc, "bus release mem resource", 0, sc->reg_res_id);
     }
 
-    for ( i=0; i< tws_queue_depth; i++) {
+    for (i = 0; i < tws_queue_depth; i++) {
 	    if (sc->reqs[i].dma_map)
 		    bus_dmamap_destroy(sc->data_tag, sc->reqs[i].dma_map);
 	    callout_drain(&sc->reqs[i].timeout);
@@ -432,7 +432,7 @@ tws_setup_intr(struct tws_softc *sc, int irqs)
 {
     int i, error;
 
-    for(i=0;i<irqs;i++) {
+    for (i = 0; i < irqs; i++) {
         if (!(sc->intr_handle[i])) {
             if ((error = bus_setup_intr(sc->tws_dev, sc->irq_res[i],
                                     INTR_TYPE_CAM | INTR_MPSAFE,
@@ -452,7 +452,7 @@ tws_teardown_intr(struct tws_softc *sc)
 {
     int i;
 
-    for(i=0;i<sc->irqs;i++) {
+    for (i = 0; i < sc->irqs; i++) {
         if (sc->intr_handle[i]) {
             bus_teardown_intr(sc->tws_dev,
                                       sc->irq_res[i], sc->intr_handle[i]);
@@ -669,8 +669,7 @@ tws_init_reqs(struct tws_softc *sc, u_int32_t dma_mem_size)
     bzero(cmd_buf, dma_mem_size);
     TWS_TRACE_DEBUG(sc, "phy cmd", sc->dma_mem_phys, 0);
     mtx_lock(&sc->q_lock);
-    for ( i=0; i< tws_queue_depth; i++)
-    {
+    for (i = 0; i < tws_queue_depth; i++) {
         if (bus_dmamap_create(sc->data_tag, 0, &sc->reqs[i].dma_map)) {
             /* log a ENOMEM failure msg here */
             mtx_unlock(&sc->q_lock);
diff --git a/sys/dev/tws/tws_services.c b/sys/dev/tws/tws_services.c
index da8bbacc39f7..e5c3d45c533f 100644
--- a/sys/dev/tws/tws_services.c
+++ b/sys/dev/tws/tws_services.c
@@ -200,7 +200,7 @@ tws_init_qs(struct tws_softc *sc)
 {
 
     mtx_lock(&sc->q_lock);
-    for(int i=0;i<TWS_MAX_QS;i++) {
+    for (int i = 0; i < TWS_MAX_QS; i++) {
         sc->q_head[i] = NULL;
         sc->q_tail[i] = NULL;
     }
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 460a508a60dc..4961b21180e1 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -120,18 +120,18 @@ vcpu_unlock_one(struct vcpu *vcpu)
 	vcpu_set_state(vcpu, VCPU_IDLE, false);
 }
 
+#ifndef __amd64__
 static int
-vcpu_lock_all(struct vmmdev_softc *sc)
+vcpu_set_state_all(struct vm *vm, enum vcpu_state newstate)
 {
 	struct vcpu *vcpu;
 	int error;
 	uint16_t i, j, maxcpus;
 
 	error = 0;
-	vm_slock_vcpus(sc->vm);
-	maxcpus = vm_get_maxcpus(sc->vm);
+	maxcpus = vm_get_maxcpus(vm);
 	for (i = 0; i < maxcpus; i++) {
-		vcpu = vm_vcpu(sc->vm, i);
+		vcpu = vm_vcpu(vm, i);
 		if (vcpu == NULL)
 			continue;
 		error = vcpu_lock_one(vcpu);
@@ -141,16 +141,32 @@ vcpu_lock_all(struct vmmdev_softc *sc)
 
 	if (error) {
 		for (j = 0; j < i; j++) {
-			vcpu = vm_vcpu(sc->vm, j);
+			vcpu = vm_vcpu(vm, j);
 			if (vcpu == NULL)
 				continue;
 			vcpu_unlock_one(vcpu);
 		}
-		vm_unlock_vcpus(sc->vm);
 	}
 
 	return (error);
 }
+#endif
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+	int error;
+
+	/*
+	 * Serialize vcpu_lock_all() callers.  Individual vCPUs are not locked
+	 * in a consistent order so we need to serialize to avoid deadlocks.
+	 */
+	vm_lock_vcpus(sc->vm);
+	error = vcpu_set_state_all(sc->vm, VCPU_FROZEN);
+	if (error != 0)
+		vm_unlock_vcpus(sc->vm);
+	return (error);
+}
 
 static void
 vcpu_unlock_all(struct vmmdev_softc *sc)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index be59e37de33d..9df31c9ba133 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -26,10 +26,14 @@
 
 static void vm_free_memmap(struct vm *vm, int ident);
 
-void
-vm_mem_init(struct vm_mem *mem)
+int
+vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi)
 {
+	mem->mem_vmspace = vmmops_vmspace_alloc(lo, hi);
+	if (mem->mem_vmspace == NULL)
+		return (ENOMEM);
 	sx_init(&mem->mem_segs_lock, "vm_mem_segs");
+	return (0);
 }
 
 static bool
@@ -93,10 +97,21 @@ vm_mem_destroy(struct vm *vm)
 	for (int i = 0; i < VM_MAX_MEMSEGS; i++)
 		vm_free_memseg(vm, i);
 
+	vmmops_vmspace_free(mem->mem_vmspace);
+
 	sx_xunlock(&mem->mem_segs_lock);
 	sx_destroy(&mem->mem_segs_lock);
 }
 
+struct vmspace *
+vm_vmspace(struct vm *vm)
+{
+	struct vm_mem *mem;
+
+	mem = vm_mem(vm);
+	return (mem->mem_vmspace);
+}
+
 void
 vm_slock_memsegs(struct vm *vm)
 {
@@ -246,7 +261,7 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
 	struct vm_mem *mem;
 	struct vm_mem_seg *seg;
 	struct vm_mem_map *m, *map;
-	struct vmspace *vmspace;
+	struct vm_map *vmmap;
 	vm_ooffset_t last;
 	int i, error;
 
@@ -282,19 +297,19 @@ vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
 	if (map == NULL)
 		return (ENOSPC);
 
-	vmspace = vm_vmspace(vm);
-	error = vm_map_find(&vmspace->vm_map, seg->object, first, &gpa,
-	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
+	vmmap = &mem->mem_vmspace->vm_map;
+	error = vm_map_find(vmmap, seg->object, first, &gpa, len, 0,
+	    VMFS_NO_SPACE, prot, prot, 0);
 	if (error != KERN_SUCCESS)
 		return (EFAULT);
 
 	vm_object_reference(seg->object);
 
 	if (flags & VM_MEMMAP_F_WIRED) {
-		error = vm_map_wire(&vmspace->vm_map, gpa, gpa + len,
+		error = vm_map_wire(vmmap, gpa, gpa + len,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		if (error != KERN_SUCCESS) {
-			vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
+			vm_map_remove(vmmap, gpa, gpa + len);
 			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
 			    EFAULT);
 		}
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index 856470cf2590..f3d22058c7b8 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -36,6 +36,7 @@ enum {
 
 struct vm;
 struct vm_object;
+struct vmspace;
 
 struct vm_mem_seg {
 	size_t	len;
@@ -56,12 +57,15 @@ struct vm_mem {
 	struct vm_mem_map	mem_maps[VM_MAX_MEMMAPS];
 	struct vm_mem_seg	mem_segs[VM_MAX_MEMSEGS];
 	struct sx		mem_segs_lock;
+	struct vmspace		*mem_vmspace;
 };
 
-void	vm_mem_init(struct vm_mem *mem);
+int	vm_mem_init(struct vm_mem *mem, vm_offset_t lo, vm_offset_t hi);
 void	vm_mem_cleanup(struct vm *vm);
 void	vm_mem_destroy(struct vm *vm);
 
+struct vmspace *vm_vmspace(struct vm *vm);
+
 /*
  * APIs that modify the guest memory map require all vcpus to be frozen.
  */
diff --git a/sys/dev/xdma/xdma.c b/sys/dev/xdma/xdma.c
index 62b781159d03..cdd9ad0b8f39 100644
--- a/sys/dev/xdma/xdma.c
+++ b/sys/dev/xdma/xdma.c
@@ -555,7 +555,7 @@ xdma_put(xdma_controller_t *xdma)
 }
 
 static void
-xdma_init(void)
+xdma_init(void *dummy __unused)
 {
 
 	mtx_init(&xdma_mtx, "xDMA", NULL, MTX_DEF);
diff --git a/sys/dev/xen/bus/xen_intr.c b/sys/dev/xen/bus/xen_intr.c
index cb30b6efa484..2b5fa8fb7cd1 100644
--- a/sys/dev/xen/bus/xen_intr.c
+++ b/sys/dev/xen/bus/xen_intr.c
@@ -460,7 +460,7 @@ xen_intr_handle_upcall(void *unused __unused)
 	return (FILTER_HANDLED);
 }
 
-static int
+static void
 xen_intr_init(void *dummy __unused)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
@@ -468,7 +468,7 @@ xen_intr_init(void *dummy __unused)
 	int i;
 
 	if (!xen_domain())
-		return (0);
+		return;
 
 	_Static_assert(is_valid_evtchn(0),
 	    "is_valid_evtchn(0) fails (unused by Xen, but valid by interface");
@@ -502,8 +502,6 @@ xen_intr_init(void *dummy __unused)
 
 	if (bootverbose)
 		printf("Xen interrupt system initialized\n");
-
-	return (0);
 }
 SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL);
 
diff --git a/sys/fs/devfs/devfs_dir.c b/sys/fs/devfs/devfs_dir.c
index 3dc87538017d..aad87606e738 100644
--- a/sys/fs/devfs/devfs_dir.c
+++ b/sys/fs/devfs/devfs_dir.c
@@ -162,7 +162,7 @@ int
 devfs_pathpath(const char *p1, const char *p2)
 {
 
-	for (;;p1++, p2++) {
+	for (;; p1++, p2++) {
 		if (*p1 != *p2) {
 			if (*p1 == '/' && *p2 == '\0')
 				return (1);
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 5c28db29fc63..683ee2f7ad56 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -284,7 +284,7 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
-	if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH))
+	if (fsess_not_impl(mp, FUSE_FLUSH))
 		return 0;
 
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
@@ -292,7 +292,7 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 		return err;
 
 	if (fufh->fuse_open_flags & FOPEN_NOFLUSH &&
-	    (!fsess_opt_writeback(vnode_mount(vp))))
+	    (!fsess_opt_writeback(mp)))
 		return (0);
 
 	fdisp_init(&fdi, sizeof(*ffi));
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index ad3f7779e108..7bfdc20a3f67 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -35,11 +35,12 @@
 #ifndef	FS_NULL_H
 #define	FS_NULL_H
 
-#define	NULLM_CACHE	0x0001
-
 #include <sys/ck.h>
 #include <vm/uma.h>
 
+#define	NULLM_CACHE		0x0001
+#define	NULLM_NOUNPBYPASS	0x0002
+
 struct null_mount {
 	struct mount	*nullm_vfs;
 	struct vnode	*nullm_lowerrootvp;	/* Ref to lower root vnode */
@@ -82,6 +83,16 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
 #endif
 
 extern struct vop_vector null_vnodeops;
+extern struct vop_vector null_vnodeops_no_unp_bypass;
+
+static inline bool
+null_is_nullfs_vnode(struct vnode *vp)
+{
+	const struct vop_vector *op;
+
+	op = vp->v_op;
+	return (op == &null_vnodeops || op == &null_vnodeops_no_unp_bypass);
+}
 
 extern uma_zone_t null_node_zone;
 
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index d7f847d449d0..a843ae44f121 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -240,7 +240,9 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 	 */
 	xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
 
-	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
+	error = getnewvnode("nullfs", mp, (MOUNTTONULLMOUNT(mp)->nullm_flags &
+	    NULLM_NOUNPBYPASS) != 0 ? &null_vnodeops_no_unp_bypass :
+	    &null_vnodeops, &vp);
 	if (error) {
 		vput(lowervp);
 		uma_zfree_smr(null_node_zone, xp);
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 4cddf24a5745..170a3dd51cd8 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -85,6 +85,10 @@ nullfs_mount(struct mount *mp)
 	char *target;
 	int error, len;
 	bool isvnunlocked;
+	static const char cache_opt_name[] = "cache";
+	static const char nocache_opt_name[] = "nocache";
+	static const char unixbypass_opt_name[] = "unixbypass";
+	static const char nounixbypass_opt_name[] = "nounixbypass";
 
 	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
 
@@ -116,7 +120,7 @@ nullfs_mount(struct mount *mp)
 	/*
 	 * Unlock lower node to avoid possible deadlock.
 	 */
-	if (mp->mnt_vnodecovered->v_op == &null_vnodeops &&
+	if (null_is_nullfs_vnode(mp->mnt_vnodecovered) &&
 	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
 		VOP_UNLOCK(mp->mnt_vnodecovered);
 		isvnunlocked = true;
@@ -150,7 +154,7 @@ nullfs_mount(struct mount *mp)
 	/*
 	 * Check multi null mount to avoid `lock against myself' panic.
 	 */
-	if (mp->mnt_vnodecovered->v_op == &null_vnodeops) {
+	if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) {
 		nn = VTONULL(mp->mnt_vnodecovered);
 		if (nn == NULL || lowerrootvp == nn->null_lowervp) {
 			NULLFSDEBUG("nullfs_mount: multi null mount?\n");
@@ -205,9 +209,10 @@ nullfs_mount(struct mount *mp)
 		MNT_IUNLOCK(mp);
 	}
 
-	if (vfs_getopt(mp->mnt_optnew, "cache", NULL, NULL) == 0) {
+	if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) {
 		xmp->nullm_flags |= NULLM_CACHE;
-	} else if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) {
+	} else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL,
+	    NULL) == 0) {
 		;
 	} else if (null_cache_vnodes &&
 	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
@@ -219,6 +224,13 @@ nullfs_mount(struct mount *mp)
 		    &xmp->notify_node);
 	}
 
+	if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) {
+		;
+	} else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL,
+	    NULL) == 0) {
+		xmp->nullm_flags |= NULLM_NOUNPBYPASS;
+	}
+
 	if (lowerrootvp == mp->mnt_vnodecovered) {
 		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
 		lowerrootvp->v_vflag |= VV_CROSSLOCK;
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index ec8a6b10b13f..d4baabeb40ab 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -278,7 +278,7 @@ null_bypass(struct vop_generic_args *ap)
 		 * that aren't.  (We must always map first vp or vclean fails.)
 		 */
 		if (i != 0 && (*this_vp_p == NULL ||
-			       (*this_vp_p)->v_op != &null_vnodeops)) {
+		    !null_is_nullfs_vnode(*this_vp_p))) {
 			old_vps[i] = NULL;
 		} else {
 			old_vps[i] = *this_vp_p;
@@ -1256,3 +1256,11 @@ struct vop_vector null_vnodeops = {
 	.vop_copy_file_range =	VOP_PANIC,
 };
 VFS_VOP_VECTOR_REGISTER(null_vnodeops);
+
+struct vop_vector null_vnodeops_no_unp_bypass = {
+	.vop_default =		&null_vnodeops,
+	.vop_unp_bind =		vop_stdunp_bind,
+	.vop_unp_connect =	vop_stdunp_connect,
+	.vop_unp_detach =	vop_stdunp_detach,
+};
+VFS_VOP_VECTOR_REGISTER(null_vnodeops_no_unp_bypass);
diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c
index c82d81fedcd7..25eee984265c 100644
--- a/sys/fs/p9fs/p9_transport.c
+++ b/sys/fs/p9fs/p9_transport.c
@@ -34,9 +34,8 @@
 TAILQ_HEAD(, p9_trans_module) transports;
 
 static void
-p9_transport_init(void)
+p9_transport_init(void *dummy __unused)
 {
-
         TAILQ_INIT(&transports);
 }
 
diff --git a/sys/fs/udf/osta.c b/sys/fs/udf/osta.c
index f79b86993367..1a083d8c26b1 100644
--- a/sys/fs/udf/osta.c
+++ b/sys/fs/udf/osta.c
@@ -383,7 +383,7 @@ int UDFTransName(
 			int maxFilenameLen;
 			/* Translate extension, and store it in ext. */
 			for(index = 0; index<EXT_SIZE &&
-			    extIndex + index +1 < udfLen; index++ ) {
+			    extIndex + index +1 < udfLen; index++) {
 				current = udfName[extIndex + index + 1];
 				if (IsIllegal(current) ||
 				    !UnicodeIsPrint(current)) {
@@ -432,7 +432,7 @@ int UDFTransName(
 		/* Place a translated extension at end, if found. */
 		if (hasExt) {
 			newName[newIndex++] = PERIOD;
-			for (index = 0;index < localExtIndex ;index++ ) {
+			for (index = 0; index < localExtIndex; index++) {
 				newName[newIndex++] = ext[index];
 			}
 		}
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index a14f9ca74305..b6d6db60ca3d 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -587,6 +587,7 @@ unionfs_find_node_status(struct unionfs_node *unp, struct thread *td)
 	struct unionfs_node_status *unsp;
 	pid_t pid;
 
+	MPASS(td != NULL);
 	pid = td->td_proc->p_pid;
 
 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
@@ -612,6 +613,7 @@ unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
 	struct unionfs_node_status *unsp;
 	pid_t pid;
 
+	MPASS(td != NULL);
 	pid = td->td_proc->p_pid;
 
 	KASSERT(NULL != unspp, ("%s: NULL status", __func__));
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 627b2f6e9a1d..66fee97a07d5 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -814,7 +814,7 @@ unionfs_close(struct vop_close_args *ap)
 	unp = VTOUNIONFS(vp);
 	lvp = unp->un_lowervp;
 	uvp = unp->un_uppervp;
-	unsp = unionfs_find_node_status(unp, td);
+	unsp = (td != NULL) ? unionfs_find_node_status(unp, td) : NULL;
 
 	if (unsp == NULL ||
 	    (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0)) {
@@ -2208,7 +2208,6 @@ unionfs_lock_restart:
 	vholdnz(tvp);
 	VI_UNLOCK(vp);
 	error = VOP_LOCK(tvp, flags);
-	vdrop(tvp);
 	if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) {
 		/*
 		 * After dropping the interlock above, there exists a window
@@ -2234,6 +2233,7 @@ unionfs_lock_restart:
 		unp = VTOUNIONFS(vp);
 		if (unp == NULL || unp->un_uppervp != NULL) {
 			VOP_UNLOCK(tvp);
+			vdrop(tvp);
 			/*
 			 * If we previously held the lock, the upgrade may
 			 * have temporarily dropped the lock, in which case
@@ -2249,6 +2249,7 @@ unionfs_lock_restart:
 			goto unionfs_lock_restart;
 		}
 	}
+	vdrop(tvp);
 
 	return (error);
 }
@@ -2259,7 +2260,6 @@ unionfs_unlock(struct vop_unlock_args *ap)
 	struct vnode   *vp;
 	struct vnode   *tvp;
 	struct unionfs_node *unp;
-	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
@@ -2271,11 +2271,7 @@ unionfs_unlock(struct vop_unlock_args *ap)
 
 	tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp);
 
-	vholdnz(tvp);
-	error = VOP_UNLOCK(tvp);
-	vdrop(tvp);
-
-	return (error);
+	return (VOP_UNLOCK(tvp));
 }
 
 static int
diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c
index 4c0d0c3aa902..1e4236507fa4 100644
--- a/sys/geom/part/g_part.c
+++ b/sys/geom/part/g_part.c
@@ -122,13 +122,13 @@ struct g_part_alias_list {
 	{ "ntfs", G_PART_ALIAS_MS_NTFS },
 	{ "openbsd-data", G_PART_ALIAS_OPENBSD_DATA },
 	{ "prep-boot", G_PART_ALIAS_PREP_BOOT },
-        { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT },
-        { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT },
-        { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP },
-        { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP },
-        { "solaris-var", G_PART_ALIAS_SOLARIS_VAR },
-        { "solaris-home", G_PART_ALIAS_SOLARIS_HOME },
-        { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC },
+	{ "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT },
+	{ "solaris-root", G_PART_ALIAS_SOLARIS_ROOT },
+	{ "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP },
+	{ "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP },
+	{ "solaris-var", G_PART_ALIAS_SOLARIS_VAR },
+	{ "solaris-home", G_PART_ALIAS_SOLARIS_HOME },
+	{ "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC },
 	{ "solaris-reserved", G_PART_ALIAS_SOLARIS_RESERVED },
 	{ "u-boot-env", G_PART_ALIAS_U_BOOT_ENV },
 	{ "vmware-reserved", G_PART_ALIAS_VMRESERVED },
diff --git a/sys/i386/i386/in_cksum_machdep.c b/sys/i386/i386/in_cksum_machdep.c
index 27ab09d82da0..b658d85bc892 100644
--- a/sys/i386/i386/in_cksum_machdep.c
+++ b/sys/i386/i386/in_cksum_machdep.c
@@ -84,7 +84,7 @@ in_cksum_skip(struct mbuf *m, int len, int skip)
 		}
 	}
 
-	for (;m && len; m = m->m_next) {
+	for (; m && len; m = m->m_next) {
 		if (m->m_len == 0)
 			continue;
 		w = mtod(m, u_short *);
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 6aac0e968362..3f659432552c 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1605,7 +1605,7 @@ init386(int first)
 }
 
 static void
-machdep_init_trampoline(void)
+machdep_init_trampoline(void *dummy __unused)
 {
 	struct region_descriptor r_gdt, r_idt;
 	struct i386tss *tss;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index b44f5e08bbcf..1cf0867d57c3 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -720,7 +720,7 @@ __CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr)
 }
 
 static void
-pmap_init_reserved_pages(void)
+pmap_init_reserved_pages(void *dummy __unused)
 {
 	struct pcpu *pc;
 	vm_offset_t pages;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 1bc2491a1a12..a1fabbc86f27 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -92,7 +92,7 @@
 #define	ELF_ABI_ID	__CONCAT(elf, __ELF_WORD_SIZE)
 
 static int __elfN(check_header)(const Elf_Ehdr *hdr);
-static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
+static const Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
     const char *interp, int32_t *osrel, uint32_t *fctl0);
 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
     u_long *entry);
@@ -104,7 +104,7 @@ static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,
     int32_t *osrel);
 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static bool __elfN(check_note)(struct image_params *imgp,
-    Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,
+    const Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,
     uint32_t *fctl0);
 static vm_prot_t __elfN(trans_prot)(Elf_Word);
 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
@@ -227,7 +227,7 @@ SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx,
     CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
     "Allow pages to be mapped simultaneously writable and executable");
 
-static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
+static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
 
 #define	aligned(a, t)	(rounddown2((u_long)(a), sizeof(t)) == (u_long)(a))
 
@@ -286,7 +286,7 @@ kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
 }
 
 int
-__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
+__elfN(insert_brand_entry)(const Elf_Brandinfo *entry)
 {
 	int i;
 
@@ -305,7 +305,7 @@ __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
 }
 
 int
-__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
+__elfN(remove_brand_entry)(const Elf_Brandinfo *entry)
 {
 	int i;
 
@@ -321,7 +321,7 @@ __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
 }
 
 bool
-__elfN(brand_inuse)(Elf_Brandinfo *entry)
+__elfN(brand_inuse)(const Elf_Brandinfo *entry)
 {
 	struct proc *p;
 	bool rval = false;
@@ -338,12 +338,12 @@ __elfN(brand_inuse)(Elf_Brandinfo *entry)
 	return (rval);
 }
 
-static Elf_Brandinfo *
+static const Elf_Brandinfo *
 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
     int32_t *osrel, uint32_t *fctl0)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
-	Elf_Brandinfo *bi, *bi_m;
+	const Elf_Brandinfo *bi, *bi_m;
 	bool ret, has_fctl0;
 	int i, interp_name_len;
 
@@ -492,7 +492,7 @@ __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr)
 static int
 __elfN(check_header)(const Elf_Ehdr *hdr)
 {
-	Elf_Brandinfo *bi;
+	const Elf_Brandinfo *bi;
 	int i;
 
 	if (!IS_ELF(*hdr) ||
@@ -1109,7 +1109,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 	struct vmspace *vmspace;
 	vm_map_t map;
 	char *interp;
-	Elf_Brandinfo *brand_info;
+	const Elf_Brandinfo *brand_info;
 	struct sysentvec *sv;
 	u_long addr, baddr, entry, proghdr;
 	u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc;
@@ -1925,7 +1925,7 @@ __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
 	Elf_Phdr *phdr;
 	Elf_Shdr *shdr;
 	struct phdr_closure phc;
-	Elf_Brandinfo *bi;
+	const Elf_Brandinfo *bi;
 
 	ehdr = (Elf_Ehdr *)hdr;
 	bi = td->td_proc->p_elf_brandinfo;
@@ -2831,7 +2831,7 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,
 		}
 		if ((const char *)note_end - (const char *)note <
 		    sizeof(Elf_Note)) {
-			uprintf("ELF note to short\n");
+			uprintf("ELF note too short\n");
 			goto retf;
 		}
 		if (note->n_namesz != checknote->n_namesz ||
@@ -2839,9 +2839,9 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,
 		    note->n_type != checknote->n_type)
 			goto nextnote;
 		note_name = (const char *)(note + 1);
-		if (note_name + checknote->n_namesz >=
-		    (const char *)note_end || strncmp(note_vendor,
-		    note_name, checknote->n_namesz) != 0)
+		if (note_name + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
+		    note->n_descsz >= (const char *)note_end ||
+		    strncmp(note_vendor, note_name, checknote->n_namesz) != 0)
 			goto nextnote;
 
 		if (cb(note, cb_arg, &res))
@@ -2861,7 +2861,7 @@ ret:
 }
 
 struct brandnote_cb_arg {
-	Elf_Brandnote *brandnote;
+	const Elf_Brandnote *brandnote;
 	int32_t *osrel;
 };
 
@@ -2883,7 +2883,7 @@ brandnote_cb(const Elf_Note *note, void *arg0, bool *res)
 	return (true);
 }
 
-static Elf_Note fctl_note = {
+static const Elf_Note fctl_note = {
 	.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
 	.n_descsz = sizeof(uint32_t),
 	.n_type = NT_FREEBSD_FEATURE_CTL,
@@ -2918,7 +2918,7 @@ note_fctl_cb(const Elf_Note *note, void *arg0, bool *res)
  * as for headers.
  */
 static bool
-__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
+__elfN(check_note)(struct image_params *imgp, const Elf_Brandnote *brandnote,
     int32_t *osrel, bool *has_fctl0, uint32_t *fctl0)
 {
 	const Elf_Phdr *phdr;
diff --git a/sys/kern/kern_boottrace.c b/sys/kern/kern_boottrace.c
index 1fa87955a299..c83255bc74ee 100644
--- a/sys/kern/kern_boottrace.c
+++ b/sys/kern/kern_boottrace.c
@@ -579,7 +579,7 @@ sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)
 }
 
 static void
-boottrace_init(void)
+boottrace_init(void *dummy __unused)
 {
 
 	if (!boottrace_enabled)
diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c
index 7a2818c29b1a..a1696225df32 100644
--- a/sys/kern/kern_devctl.c
+++ b/sys/kern/kern_devctl.c
@@ -140,7 +140,7 @@ static struct devctlbridge {
 } devctl_notify_hook = { .send_f = NULL };
 
 static void
-devctl_init(void)
+devctl_init(void *dummy __unused)
 {
 	int reserve;
 	uma_zone_t z;
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 23d8dc9cf54a..a6333d8011b1 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -156,7 +156,7 @@ static void 	knote_drop(struct knote *kn, struct thread *td);
 static void 	knote_drop_detached(struct knote *kn, struct thread *td);
 static void 	knote_enqueue(struct knote *kn);
 static void 	knote_dequeue(struct knote *kn);
-static void 	knote_init(void);
+static void 	knote_init(void *);
 static struct 	knote *knote_alloc(int mflag);
 static void 	knote_free(struct knote *kn);
 
@@ -2887,7 +2887,7 @@ knote_dequeue(struct knote *kn)
 }
 
 static void
-knote_init(void)
+knote_init(void *dummy __unused)
 {
 
 	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0fc2d0e7f1bc..2bdd6faa025a 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -418,7 +418,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
 #endif
 	int error, i, orig_osrel;
 	uint32_t orig_fctl0;
-	Elf_Brandinfo *orig_brandinfo;
+	const Elf_Brandinfo *orig_brandinfo;
 	size_t freepath_size;
 	static const char fexecv_proc_title[] = "(fexecv)";
 
@@ -1314,7 +1314,7 @@ exec_map_stack(struct image_params *imgp)
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 	} else {
 		sharedpage_addr = sv->sv_shared_page_base;
-		vm_map_fixed(map, obj, 0,
+		error = vm_map_fixed(map, obj, 0,
 		    sharedpage_addr, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index ab8ed32ad189..c4b1c8201ff2 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -807,7 +807,7 @@ kern_abort2(struct thread *td, const char *why, int nargs, void **uargs)
 	}
 	if (nargs > 0) {
 		sbuf_putc(sb, '(');
-		for (i = 0;i < nargs; i++)
+		for (i = 0; i < nargs; i++)
 			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
 		sbuf_putc(sb, ')');
 	}
diff --git a/sys/kern/kern_jailmeta.c b/sys/kern/kern_jailmeta.c
index 4e37eccad03a..91bb7155820d 100644
--- a/sys/kern/kern_jailmeta.c
+++ b/sys/kern/kern_jailmeta.c
@@ -599,22 +599,18 @@ SYSCTL_PROC(_security_jail, OID_AUTO, env,
 
 /* Setup and tear down. */
 
-static int
+static void
 jm_sysinit(void *arg __unused)
 {
 	meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);
 	env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods);
-
-	return (0);
 }
 
-static int
+static void
 jm_sysuninit(void *arg __unused)
 {
 	osd_jail_deregister(meta.osd_slot);
 	osd_jail_deregister(env.osd_slot);
-
-	return (0);
 }
 
 SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL);
diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c
index d566bc01bc5e..e2f63cbc0c5a 100644
--- a/sys/kern/kern_linker.c
+++ b/sys/kern/kern_linker.c
@@ -435,7 +435,7 @@ linker_file_register_modules(linker_file_t lf)
 }
 
 static void
-linker_init_kernel_modules(void)
+linker_init_kernel_modules(void *dummy __unused)
 {
 
 	sx_xlock(&kld_sx);
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 653ce1ee556b..fcbfbe64f854 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -303,7 +303,7 @@ sysctl_vm_malloc_zone_sizes(SYSCTL_HANDLER_ARGS)
  */
 #if MALLOC_DEBUG_MAXZONES > 1
 static void
-tunable_set_numzones(void)
+tunable_set_numzones(void *dummy __unused)
 {
 
 	TUNABLE_INT_FETCH("debug.malloc.numzones",
@@ -1302,7 +1302,7 @@ mallocinit(void *dummy)
 #endif
 			    align, UMA_ZONE_MALLOC);
 		}
-		for (;i <= size; i+= KMEM_ZBASE)
+		for (; i <= size; i+= KMEM_ZBASE)
 			kmemsize[i >> KMEM_ZSHIFT] = indx;
 	}
 }
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 7351e9cb6313..2aab151aba08 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -1312,7 +1312,7 @@ static struct kproc_desc racctd_kp = {
 };
 
 static void
-racctd_init(void)
+racctd_init(void *dummy __unused)
 {
 	if (!racct_enable)
 		return;
@@ -1322,7 +1322,7 @@ racctd_init(void)
 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
 
 static void
-racct_init(void)
+racct_init(void *dummy __unused)
 {
 	if (!racct_enable)
 		return;
diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
index 3854ffbeec29..cd66bff62608 100644
--- a/sys/kern/kern_rangelock.c
+++ b/sys/kern/kern_rangelock.c
@@ -300,7 +300,7 @@ static void rangelock_free_free(struct rl_q_entry *free);
 static void rangelock_noncheating_destroy(struct rangelock *lock);
 
 static void
-rangelock_sys_init(void)
+rangelock_sys_init(void *dummy __unused)
 {
 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c
index 4232c71f86fb..682ba86d23ff 100644
--- a/sys/kern/kern_rctl.c
+++ b/sys/kern/kern_rctl.c
@@ -209,7 +209,7 @@ static struct dict actionnames[] = {
 	{ "throttle", RCTL_ACTION_THROTTLE },
 	{ NULL, -1 }};
 
-static void rctl_init(void);
+static void rctl_init(void *);
 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
 
 static uma_zone_t rctl_rule_zone;
@@ -2175,7 +2175,7 @@ rctl_racct_release(struct racct *racct)
 }
 
 static void
-rctl_init(void)
+rctl_init(void *dummy __unused)
 {
 
 	if (!racct_enable)
diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c
index 5b8398caaca9..f48d0e3d616b 100644
--- a/sys/kern/kern_sharedpage.c
+++ b/sys/kern/kern_sharedpage.c
@@ -130,8 +130,7 @@ shared_page_init(void *dummy __unused)
 	shared_page_mapping = (char *)addr;
 }
 
-SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init,
-    NULL);
+SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, shared_page_init, NULL);
 
 /*
  * Push the timehands update to the shared page.
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 8efc0886988b..21f765b17f62 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -113,7 +113,7 @@ static int	filt_sigattach(struct knote *kn);
 static void	filt_sigdetach(struct knote *kn);
 static int	filt_signal(struct knote *kn, long hint);
 static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock);
-static void	sigqueue_start(void);
+static void	sigqueue_start(void *);
 static void	sigfastblock_setpend(struct thread *td, bool resched);
 static void	sig_handle_first_stop(struct thread *td, struct proc *p,
     int sig);
@@ -344,7 +344,7 @@ ast_sigsuspend(struct thread *td, int tda __unused)
 }
 
 static void
-sigqueue_start(void)
+sigqueue_start(void *dummy __unused)
 {
 	ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 2a6f0989f6aa..5b7485c25cd7 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -90,7 +90,7 @@ static int	user_clock_nanosleep(struct thread *td, clockid_t clock_id,
 		    int flags, const struct timespec *ua_rqtp,
 		    struct timespec *ua_rmtp);
 
-static void	itimer_start(void);
+static void	itimer_start(void *);
 static int	itimer_init(void *, int, int);
 static void	itimer_fini(void *, int);
 static void	itimer_enter(struct itimer *);
@@ -1170,7 +1170,7 @@ eventratecheck(struct timeval *lasttime, int *cureps, int maxeps)
 }
 
 static void
-itimer_start(void)
+itimer_start(void *dummy __unused)
 {
 	static const struct kclock rt_clock = {
 		.timer_create  = realtimer_create,
diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c
index 07a9cc0f57be..c4d0223d484f 100644
--- a/sys/kern/subr_devstat.c
+++ b/sys/kern/subr_devstat.c
@@ -415,7 +415,7 @@ sysctl_devstat(SYSCTL_HANDLER_ARGS)
 	if (error != 0)
 		return (error);
 
-	for (;nds != NULL;) {
+	while (nds != NULL) {
 		error = SYSCTL_OUT(req, nds, sizeof(struct devstat));
 		if (error != 0)
 			return (error);
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 5c14e15830f4..c9a387a5e87b 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -140,7 +140,7 @@ uma_zone_t pcpu_zone_32;
 uma_zone_t pcpu_zone_64;
 
 static void
-pcpu_zones_startup(void)
+pcpu_zones_startup(void *dummy __unused)
 {
 
 	pcpu_zone_4 = uma_zcreate("pcpu-4", 4,
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index db0ceb17b9f0..e2070ae3f865 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -766,7 +766,7 @@ reswitch:	switch (ch = (u_char)*fmt++) {
 				PCHAR(hex2ascii(*up & 0x0f));
 				up++;
 				if (width)
-					for (q=p;*q;q++)
+					for (q = p; *q; q++)
 						PCHAR(*q);
 			}
 			break;
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index c221106ae067..bc0725230cca 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -586,7 +586,7 @@ soaio_enqueue(struct task *task)
 }
 
 static void
-soaio_init(void)
+soaio_init(void *dummy __unused)
 {
 
 	soaio_lifetime = AIOD_LIFETIME_DEFAULT;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 340d84666459..90489e99491a 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1069,6 +1069,21 @@ uipc_stream_sbspace(struct sockbuf *sb)
 	return (min(space, mbspace));
 }
 
+/*
+ * UNIX version of generic sbwait() for writes.  We wait on peer's receive
+ * buffer, using our timeout.
+ */
+static int
+uipc_stream_sbwait(struct socket *so, sbintime_t timeo)
+{
+	struct sockbuf *sb = &so->so_rcv;
+
+	SOCK_RECVBUF_LOCK_ASSERT(so);
+	sb->sb_flags |= SB_WAIT;
+	return (msleep_sbt(&sb->sb_acc, SOCK_RECVBUF_MTX(so), PSOCK | PCATCH,
+	    "sbwait", timeo, 0, 0));
+}
+
 static int
 uipc_sosend_stream_or_seqpacket(struct socket *so, struct sockaddr *addr,
     struct uio *uio0, struct mbuf *m, struct mbuf *c, int flags,
@@ -1203,7 +1218,8 @@ restart:
 				error = EWOULDBLOCK;
 				goto out4;
 			}
-			if ((error = sbwait(so2, SO_RCV)) != 0) {
+			if ((error = uipc_stream_sbwait(so2,
+			    so->so_snd.sb_timeo)) != 0) {
 				SOCK_RECVBUF_UNLOCK(so2);
 				goto out4;
 			} else
@@ -1543,15 +1559,19 @@ restart:
 				mc_init_m(&cmc, control);
 
 				SOCK_RECVBUF_LOCK(so);
-				MPASS(!(sb->sb_state & SBS_CANTRCVMORE));
-
-				if (__predict_false(cmc.mc_len + sb->sb_ccc +
-				    sb->sb_ctl > sb->sb_hiwat)) {
+				if (__predict_false(
+				    (sb->sb_state & SBS_CANTRCVMORE) ||
+				    cmc.mc_len + sb->sb_ccc + sb->sb_ctl >
+				    sb->sb_hiwat)) {
 					/*
-					 * Too bad, while unp_externalize() was
-					 * failing, the other side had filled
-					 * the buffer and we can't prepend data
-					 * back. Losing data!
+					 * While the lock was dropped and we
+					 * were failing in unp_externalize(),
+					 * the peer could has a) disconnected,
+					 * b) filled the buffer so that we
+					 * can't prepend data back.
+					 * These are two edge conditions that
+					 * we just can't handle, so lose the
+					 * data and return the error.
 					 */
 					SOCK_RECVBUF_UNLOCK(so);
 					SOCK_IO_RECV_UNLOCK(so);
@@ -2397,7 +2417,7 @@ uipc_sendfile_wait(struct socket *so, off_t need, int *space)
 		}
 		if (!sockref)
 			soref(so2);
-		error = sbwait(so2, SO_RCV);
+		error = uipc_stream_sbwait(so2, so->so_snd.sb_timeo);
 		if (error == 0 &&
 		    __predict_false(sb->sb_state & SBS_CANTRCVMORE))
 			error = EPIPE;
diff --git a/sys/libkern/arc4random.c b/sys/libkern/arc4random.c
index 016822e9f03c..6fca7c3c4e9d 100644
--- a/sys/libkern/arc4random.c
+++ b/sys/libkern/arc4random.c
@@ -156,7 +156,7 @@ chacha20_randomstir(struct chacha20_s *chacha20)
  * Initialize the contexts.
  */
 static void
-chacha20_init(void)
+chacha20_init(void *dummy __unused)
 {
 	struct chacha20_s *chacha20;
 
@@ -176,7 +176,7 @@ SYSINIT(chacha20, SI_SUB_LOCK, SI_ORDER_ANY, chacha20_init, NULL);
 
 
 static void
-chacha20_uninit(void)
+chacha20_uninit(void *dummy __unused)
 {
 	struct chacha20_s *chacha20;
 
diff --git a/sys/libkern/x86/crc32_sse42.c b/sys/libkern/x86/crc32_sse42.c
index b79c7afbeeb1..94ffdc178910 100644
--- a/sys/libkern/x86/crc32_sse42.c
+++ b/sys/libkern/x86/crc32_sse42.c
@@ -199,8 +199,10 @@ crc32c_shift(uint32_t zeros[][256], uint32_t crc)
 static void
 #ifndef _KERNEL
 __attribute__((__constructor__))
-#endif
 crc32c_init_hw(void)
+#else
+crc32c_init_hw(void *dummy __unused)
+#endif
 {
 	crc32c_zeros(crc32c_long, LONG);
 	crc32c_zeros(crc32c_2long, 2 * LONG);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index feb9778c23da..63a0b3260e6d 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -577,6 +577,7 @@ _mlx5ib=	mlx5ib
     ${MACHINE_CPUARCH} == "i386"
 _ena=		ena
 _gve=		gve
+_igc=		igc
 # gcc13 and earlier lack __builtin_bitcountg used by linux emulation
 .if !(${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 140000)
 _iwlwifi=	iwlwifi
@@ -747,7 +748,6 @@ _et=		et
 _ftgpio=	ftgpio
 _ftwd=		ftwd
 _exca=		exca
-_igc=		igc
 _io=		io
 _itwd=		itwd
 _ix=		ix
diff --git a/sys/modules/aic7xxx/ahc/Makefile b/sys/modules/aic7xxx/ahc/Makefile
index 3741d4fb666f..6f9bdcb1d8bd 100644
--- a/sys/modules/aic7xxx/ahc/Makefile
+++ b/sys/modules/aic7xxx/ahc/Makefile
@@ -1,6 +1,4 @@
 SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH:	${SYSDIR}/dev/aic7xxx
 KMOD=	ahc
 SUBDIR+= ahc_isa ahc_pci
diff --git a/sys/modules/cxgb/Makefile b/sys/modules/cxgb/Makefile
index 2989ad580b97..7ebdc1d51945 100644
--- a/sys/modules/cxgb/Makefile
+++ b/sys/modules/cxgb/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 SUBDIR= cxgb
 SUBDIR+= cxgb_t3fw
 
diff --git a/sys/modules/dpdk_lpm4/Makefile b/sys/modules/dpdk_lpm4/Makefile
index ff68fac78915..9bc2693aeffb 100644
--- a/sys/modules/dpdk_lpm4/Makefile
+++ b/sys/modules/dpdk_lpm4/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/contrib/dpdk_rte_lpm
 
 KMOD=	dpdk_lpm4
diff --git a/sys/modules/dpdk_lpm6/Makefile b/sys/modules/dpdk_lpm6/Makefile
index f2248e5d1c1c..9de2c6650422 100644
--- a/sys/modules/dpdk_lpm6/Makefile
+++ b/sys/modules/dpdk_lpm6/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/contrib/dpdk_rte_lpm
 
 KMOD=	dpdk_lpm6
diff --git a/sys/modules/fib_dxr/Makefile b/sys/modules/fib_dxr/Makefile
index 7d1996ba510f..f8a28abe957a 100644
--- a/sys/modules/fib_dxr/Makefile
+++ b/sys/modules/fib_dxr/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/netinet
 
 KMOD=	fib_dxr
diff --git a/sys/modules/if_enc/Makefile b/sys/modules/if_enc/Makefile
index 449d869d6a21..bd865a0216a4 100644
--- a/sys/modules/if_enc/Makefile
+++ b/sys/modules/if_enc/Makefile
@@ -1,6 +1,4 @@
 SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/net
 
 KMOD=	if_enc
diff --git a/sys/modules/if_gif/Makefile b/sys/modules/if_gif/Makefile
index efcd6952a8ac..5e3fda3a51c6 100644
--- a/sys/modules/if_gif/Makefile
+++ b/sys/modules/if_gif/Makefile
@@ -1,6 +1,4 @@
 SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/net ${SYSDIR}/netinet ${SYSDIR}/netinet6
 
 KMOD=	if_gif
diff --git a/sys/modules/if_gre/Makefile b/sys/modules/if_gre/Makefile
index 9f50708a14d7..58bd03c23785 100644
--- a/sys/modules/if_gre/Makefile
+++ b/sys/modules/if_gre/Makefile
@@ -1,6 +1,5 @@
 SYSDIR?=${SRCTOP}/sys
 .PATH: ${SYSDIR}/net ${SYSDIR}/netinet ${SYSDIR}/netinet6
-.include "${SYSDIR}/conf/kern.opts.mk"
 
 KMOD=	if_gre
 SRCS=	if_gre.c opt_inet.h opt_inet6.h opt_rss.h
diff --git a/sys/modules/iser/Makefile b/sys/modules/iser/Makefile
index 615199ec97a3..ff08ae6f346a 100644
--- a/sys/modules/iser/Makefile
+++ b/sys/modules/iser/Makefile
@@ -1,6 +1,4 @@
 SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH:	${SYSDIR}/dev/iser/
 
 KMOD=	iser
diff --git a/sys/modules/ktest/Makefile b/sys/modules/ktest/Makefile
index 151db53417df..d5f15576f38b 100644
--- a/sys/modules/ktest/Makefile
+++ b/sys/modules/ktest/Makefile
@@ -1,8 +1,6 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 SUBDIR=	ktest \
 	ktest_example \
-	ktest_netlink_message_writer
+	ktest_netlink_message_writer \
+	ktest_tcphpts
 
 .include <bsd.subdir.mk>
diff --git a/sys/modules/ktest/ktest/Makefile b/sys/modules/ktest/ktest/Makefile
index 3d4f1a8c2cc0..9741662ef709 100644
--- a/sys/modules/ktest/ktest/Makefile
+++ b/sys/modules/ktest/ktest/Makefile
@@ -1,9 +1,5 @@
 PACKAGE=	tests
-
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
-.PATH: ${SYSDIR}/tests
+.PATH: ${SRCTOP}/sys/tests
 
 KMOD=	ktest
 SRCS=	ktest.c
diff --git a/sys/modules/ktest/ktest_example/Makefile b/sys/modules/ktest/ktest_example/Makefile
index 2b572d867aa5..aacc8f0e4ca5 100644
--- a/sys/modules/ktest/ktest_example/Makefile
+++ b/sys/modules/ktest/ktest_example/Makefile
@@ -1,9 +1,8 @@
 PACKAGE=	tests
 
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include "${SRCTOP}/sys/conf/kern.opts.mk"
 
-.PATH: ${SYSDIR}/tests
+.PATH: ${SRCTOP}/sys/tests
 
 KMOD=	ktest_example
 SRCS=	ktest_example.c
diff --git a/sys/modules/ktest/ktest_netlink_message_writer/Makefile b/sys/modules/ktest/ktest_netlink_message_writer/Makefile
index a91c45755d0d..3f05f9b26785 100644
--- a/sys/modules/ktest/ktest_netlink_message_writer/Makefile
+++ b/sys/modules/ktest/ktest_netlink_message_writer/Makefile
@@ -1,8 +1,6 @@
 PACKAGE=	tests
 
 SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/netlink
 
 KMOD=	ktest_netlink_message_writer
diff --git a/sys/modules/ktest/ktest_tcphpts/Makefile b/sys/modules/ktest/ktest_tcphpts/Makefile
new file mode 100644
index 000000000000..b642c0cb4209
--- /dev/null
+++ b/sys/modules/ktest/ktest_tcphpts/Makefile
@@ -0,0 +1,13 @@
+PACKAGE=	tests
+WARNS?=		6
+
+SYSDIR?=${SRCTOP}/sys
+.include "${SYSDIR}/conf/kern.opts.mk"
+
+.PATH: ${SYSDIR}/netinet
+
+KMOD=	ktest_tcphpts
+SRCS=	tcp_hpts_test.c
+
+.include <bsd.kmod.mk>
+
diff --git a/sys/modules/miiproxy/Makefile b/sys/modules/miiproxy/Makefile
index 730bef4220cd..ab92ebe71b43 100644
--- a/sys/modules/miiproxy/Makefile
+++ b/sys/modules/miiproxy/Makefile
@@ -3,7 +3,7 @@
 KMOD    = miiproxy
 
 SRCS=	miiproxy.c
-SRCS+=	bus_if.h mdio_if.h miibus_if.h opt_platform.h
+SRCS+=	bus_if.h device_if.h mdio_if.h miibus_if.h opt_platform.h
 CFLAGS+= -I${SRCTOP}/sys/dev/etherswitch
 
 .include <bsd.kmod.mk>
diff --git a/sys/modules/netgraph/Makefile b/sys/modules/netgraph/Makefile
index 94560d5c51d7..b2d65af16e7f 100644
--- a/sys/modules/netgraph/Makefile
+++ b/sys/modules/netgraph/Makefile
@@ -1,5 +1,3 @@
-# $Whistle: Makefile,v 1.5 1999/01/24 06:48:37 archie Exp $
-
 SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
diff --git a/sys/modules/netgraph/checksum/Makefile b/sys/modules/netgraph/checksum/Makefile
index 4e2b1f547a40..bbbc7363d045 100644
--- a/sys/modules/netgraph/checksum/Makefile
+++ b/sys/modules/netgraph/checksum/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 KMOD=	ng_checksum
 SRCS= 	ng_checksum.c opt_inet.h opt_inet6.h
 
diff --git a/sys/modules/netmap/Makefile b/sys/modules/netmap/Makefile
index 17b52aec1893..8c114ac51538 100644
--- a/sys/modules/netmap/Makefile
+++ b/sys/modules/netmap/Makefile
@@ -2,9 +2,6 @@
 # Compile netmap as a module, useful if you want a netmap bridge
 # or loadable drivers.
 
-.include <bsd.own.mk> # FreeBSD 10 and earlier
-# .include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${.CURDIR}/../../dev/netmap
 .PATH.h: ${.CURDIR}/../../net
 CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE
diff --git a/sys/modules/opensolaris/Makefile b/sys/modules/opensolaris/Makefile
index 98f52057e45e..7e2d5f9101ad 100644
--- a/sys/modules/opensolaris/Makefile
+++ b/sys/modules/opensolaris/Makefile
@@ -1,4 +1,4 @@
-SYSDIR?=	${SRCTOP}/sys
+SYSDIR?=${SRCTOP}/sys
 
 .PATH:		${SYSDIR}/cddl/compat/opensolaris/kern
 .PATH:		${SYSDIR}/contrib/openzfs/module/os/freebsd/spl
diff --git a/sys/modules/ow/Makefile b/sys/modules/ow/Makefile
index 76fefe3e63be..7aa9d2de8183 100644
--- a/sys/modules/ow/Makefile
+++ b/sys/modules/ow/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 SUBDIR = ow owc ow_temp
 
 .include <bsd.subdir.mk>
diff --git a/sys/modules/qlnx/Makefile b/sys/modules/qlnx/Makefile
index 2121f9d586a6..291b681c809e 100644
--- a/sys/modules/qlnx/Makefile
+++ b/sys/modules/qlnx/Makefile
@@ -31,9 +31,6 @@
 #
 #
 
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 SUBDIR=qlnxe
 SUBDIR+=qlnxev
 SUBDIR+=qlnxr
diff --git a/sys/modules/rtwn/Makefile b/sys/modules/rtwn/Makefile
index 9afdd2084ecb..f15cbbe8236b 100644
--- a/sys/modules/rtwn/Makefile
+++ b/sys/modules/rtwn/Makefile
@@ -1,7 +1,5 @@
 .PATH: ${SRCTOP}/sys/dev/rtwn
-
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include "${SRCTOP}/sys/conf/kern.opts.mk"
 
 KMOD     = rtwn
 SRCS     = if_rtwn.c if_rtwn_tx.c if_rtwn_rx.c if_rtwn_beacon.c \
diff --git a/sys/modules/rtwn_pci/Makefile b/sys/modules/rtwn_pci/Makefile
index ce2144121e88..3fea80d7d256 100644
--- a/sys/modules/rtwn_pci/Makefile
+++ b/sys/modules/rtwn_pci/Makefile
@@ -1,7 +1,5 @@
 .PATH: ${SRCTOP}/sys/dev/rtwn/pci
-
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include "${SRCTOP}/sys/conf/kern.opts.mk"
 
 KMOD     = if_rtwn_pci
 SRCS	 = rtwn_pci_attach.c rtwn_pci_reg.c rtwn_pci_rx.c rtwn_pci_tx.c \
diff --git a/sys/modules/rtwn_usb/Makefile b/sys/modules/rtwn_usb/Makefile
index 16899b8a8c49..6a73276d088c 100644
--- a/sys/modules/rtwn_usb/Makefile
+++ b/sys/modules/rtwn_usb/Makefile
@@ -1,7 +1,5 @@
 .PATH: ${SRCTOP}/sys/dev/rtwn/usb
-
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include "${SRCTOP}/sys/conf/kern.opts.mk"
 
 KMOD     = if_rtwn_usb
 SRCS	 = rtwn_usb_attach.c rtwn_usb_ep.c rtwn_usb_reg.c rtwn_usb_rx.c \
diff --git a/sys/modules/sound/driver/Makefile b/sys/modules/sound/driver/Makefile
index ff9499fdf841..02703d4b591a 100644
--- a/sys/modules/sound/driver/Makefile
+++ b/sys/modules/sound/driver/Makefile
@@ -1,5 +1,4 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include "${SRCTOP}/sys/conf/kern.opts.mk"
 
 # Modules that include binary-only blobs of microcode should be selectable by
 # MK_SOURCELESS_UCODE option (see below).
diff --git a/sys/modules/sound/sound/Makefile b/sys/modules/sound/sound/Makefile
index f3978e9bd9cc..169b1a2730ec 100644
--- a/sys/modules/sound/sound/Makefile
+++ b/sys/modules/sound/sound/Makefile
@@ -1,5 +1,4 @@
 SYSDIR?=${SRCTOP}/sys
-
 .PATH: ${SYSDIR}/dev/sound
 .PATH: ${SYSDIR}/dev/sound/pcm
 .PATH: ${SYSDIR}/dev/sound/midi
diff --git a/sys/modules/tests/fib_lookup/Makefile b/sys/modules/tests/fib_lookup/Makefile
index 7d6198396911..b78d4309f145 100644
--- a/sys/modules/tests/fib_lookup/Makefile
+++ b/sys/modules/tests/fib_lookup/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 .PATH: ${SYSDIR}/tests/fib_lookup
 
 KMOD=	test_lookup
diff --git a/sys/modules/vnic/Makefile b/sys/modules/vnic/Makefile
index 7b975bfebe81..53e208328159 100644
--- a/sys/modules/vnic/Makefile
+++ b/sys/modules/vnic/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 CFLAGS+=	-DFDT
 
 SUBDIR = mrmlbus thunder_mdio thunder_bgx vnicpf vnicvf
diff --git a/sys/modules/vnic/mrmlbus/Makefile b/sys/modules/vnic/mrmlbus/Makefile
index a3581b7a79a5..a8fe9e5474e1 100644
--- a/sys/modules/vnic/mrmlbus/Makefile
+++ b/sys/modules/vnic/mrmlbus/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 S=	${SRCTOP}/sys
 
 .PATH: $S/dev/vnic
diff --git a/sys/modules/vnic/thunder_bgx/Makefile b/sys/modules/vnic/thunder_bgx/Makefile
index 90df4b25df90..bf46c3194493 100644
--- a/sys/modules/vnic/thunder_bgx/Makefile
+++ b/sys/modules/vnic/thunder_bgx/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 S=	${SRCTOP}/sys
 
 .PATH: $S/dev/vnic
diff --git a/sys/modules/vnic/thunder_mdio/Makefile b/sys/modules/vnic/thunder_mdio/Makefile
index 37032516f3ca..07cc583bfaf8 100644
--- a/sys/modules/vnic/thunder_mdio/Makefile
+++ b/sys/modules/vnic/thunder_mdio/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 S=	${SRCTOP}/sys
 
 .PATH: $S/dev/vnic
diff --git a/sys/modules/vnic/vnicpf/Makefile b/sys/modules/vnic/vnicpf/Makefile
index 37cd29e6fdd8..3cd64d08a788 100644
--- a/sys/modules/vnic/vnicpf/Makefile
+++ b/sys/modules/vnic/vnicpf/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 S=	${SRCTOP}/sys
 
 .PATH: $S/dev/vnic
diff --git a/sys/modules/vnic/vnicvf/Makefile b/sys/modules/vnic/vnicvf/Makefile
index c6ffaaa2c302..da938b7fd073 100644
--- a/sys/modules/vnic/vnicvf/Makefile
+++ b/sys/modules/vnic/vnicvf/Makefile
@@ -1,6 +1,3 @@
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
-
 S=	${SRCTOP}/sys
 
 .PATH: $S/dev/vnic
diff --git a/sys/net/route.c b/sys/net/route.c
index 7a50bcc43e06..d2c9f3e39c17 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -89,7 +89,7 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
  * SI_ORDER_MIDDLE.
  */
 static void
-route_init(void)
+route_init(void *dummy __unused)
 {
 
 	nhops_init();
diff --git a/sys/net/route/route_tables.c b/sys/net/route/route_tables.c
index 176ca43fa1c5..3b7bb1385d0e 100644
--- a/sys/net/route/route_tables.c
+++ b/sys/net/route/route_tables.c
@@ -186,7 +186,7 @@ rtables_prison_destructor(void *data)
 }
 
 static void
-rtables_init(void)
+rtables_init(void *dummy __unused)
 {
 	osd_method_t methods[PR_MAXMETHOD] = {
 	    [PR_METHOD_ATTACH] =	rtables_check_proc_fib,
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index f0dcc973ca7c..be858428bb3e 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -309,7 +309,7 @@ rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 }
 
 static void
-rtsock_init(void)
+rtsock_init(void *dummy __unused)
 {
 	rtsbridge_orig_p = rtsock_callback_p;
 	rtsock_callback_p = &rtsbridge;
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index 3af56a228295..a8a767785fce 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -167,7 +167,7 @@ static	ieee80211_send_action_func ht_send_action_ba_delba;
 static	ieee80211_send_action_func ht_send_action_ht_txchwidth;
 
 static void
-ieee80211_ht_init(void)
+ieee80211_ht_init(void *dummy __unused)
 {
 	/*
 	 * Setup HT parameters that depends on the clock frequency.
diff --git a/sys/net80211/ieee80211_hwmp.c b/sys/net80211/ieee80211_hwmp.c
index b69210768c54..084e67da13db 100644
--- a/sys/net80211/ieee80211_hwmp.c
+++ b/sys/net80211/ieee80211_hwmp.c
@@ -212,7 +212,7 @@ SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact,
     "mesh route inactivity timeout (ms)");
 
 static void
-ieee80211_hwmp_init(void)
+ieee80211_hwmp_init(void *dummy __unused)
 {
 	/* Default values as per amendment */
 	ieee80211_hwmp_pathtimeout = msecs_to_ticks(5*1000);
diff --git a/sys/net80211/ieee80211_mesh.c b/sys/net80211/ieee80211_mesh.c
index 3f0410a69e3c..7f2e8bdcb963 100644
--- a/sys/net80211/ieee80211_mesh.c
+++ b/sys/net80211/ieee80211_mesh.c
@@ -548,7 +548,7 @@ mesh_gatemode_cb(void *arg)
 }
 
 static void
-ieee80211_mesh_init(void)
+ieee80211_mesh_init(void *dummy __unused)
 {
 
 	memset(mesh_proto_paths, 0, sizeof(mesh_proto_paths));
diff --git a/sys/net80211/ieee80211_phy.c b/sys/net80211/ieee80211_phy.c
index 7f53c717152b..b4d9b16907d2 100644
--- a/sys/net80211/ieee80211_phy.c
+++ b/sys/net80211/ieee80211_phy.c
@@ -348,7 +348,7 @@ ieee80211_setup_ratetable(struct ieee80211_rate_table *rt)
 
 /* Setup all rate tables */
 static void
-ieee80211_phy_init(void)
+ieee80211_phy_init(void *dummy __unused)
 {
 	static struct ieee80211_rate_table * const ratetables[] = {
 		&ieee80211_half_table,
diff --git a/sys/net80211/ieee80211_proto.c b/sys/net80211/ieee80211_proto.c
index 0c161d98a55a..4918bf7d025f 100644
--- a/sys/net80211/ieee80211_proto.c
+++ b/sys/net80211/ieee80211_proto.c
@@ -459,7 +459,7 @@ static const struct ieee80211_authenticator auth_internal = {
  * Setup internal authenticators once; they are never unregistered.
  */
 static void
-ieee80211_auth_setup(void)
+ieee80211_auth_setup(void *dummy __unused)
 {
 	ieee80211_authenticator_register(IEEE80211_AUTH_OPEN, &auth_internal);
 	ieee80211_authenticator_register(IEEE80211_AUTH_SHARED, &auth_internal);
diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index 10a5fc7f08ab..095c4108c768 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -102,7 +102,7 @@ vht_send_action_placeholder(struct ieee80211_node *ni,
 }
 
 static void
-ieee80211_vht_init(void)
+ieee80211_vht_init(void *dummy __unused)
 {
 
 	ieee80211_recv_action_register(IEEE80211_ACTION_CAT_VHT,
diff --git a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c
index 0181a67ac604..f35712cc8f69 100644
--- a/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c
+++ b/sys/netgraph/bluetooth/drivers/ubt/ng_ubt_rtl.c
@@ -81,9 +81,6 @@ const STRUCT_USB_HOST_ID ubt_rtl_devs[] =
 	{ USB_VPI(0x0bda, 0xb00c, 0) },
 	{ USB_VPI(0x0bda, 0xc822, 0) },
 
-	/* Realtek 8822CU Bluetooth devices */
-	{ USB_VPI(0x13d3, 0x3549, 0) },
-
 	/* Realtek 8851BE Bluetooth devices */
 	{ USB_VPI(0x13d3, 0x3600, 0) },
 
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index c20a20cd983d..bc06616dbf93 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -271,7 +271,7 @@ cc_check_default(struct cc_algo *remove_cc)
  * Initialise CC subsystem on system boot.
  */
 static void
-cc_init(void)
+cc_init(void *dummy __unused)
 {
 	CC_LIST_LOCK_INIT();
 	STAILQ_INIT(&cc_list);
diff --git a/sys/netinet/in_fib_algo.c b/sys/netinet/in_fib_algo.c
index 123dacb409e7..95621c300064 100644
--- a/sys/netinet/in_fib_algo.c
+++ b/sys/netinet/in_fib_algo.c
@@ -767,7 +767,7 @@ struct fib_lookup_module flm_radix4 = {
 };
 
 static void
-fib4_algo_init(void)
+fib4_algo_init(void *dummy __unused)
 {
 
 	fib_module_register(&flm_bsearch4);
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index f5b20c49ffd2..ba112afbf002 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -159,9 +159,6 @@ static struct ip_moptions *
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
-static struct ifnet *
-		inp_lookup_mcast_ifp(const struct inpcb *,
-		    const struct sockaddr_in *, const struct in_addr);
 static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
@@ -1832,69 +1829,55 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 }
 
 /*
- * Look up the ifnet to use for a multicast group membership,
- * given the IPv4 address of an interface, and the IPv4 group address.
- *
- * This routine exists to support legacy multicast applications
- * which do not understand that multicast memberships are scoped to
- * specific physical links in the networking stack, or which need
- * to join link-scope groups before IPv4 addresses are configured.
- *
- * Use this socket's current FIB number for any required FIB lookup.
- * If ina is INADDR_ANY, look up the group address in the unicast FIB,
- * and use its ifp; usually, this points to the default next-hop.
- *
- * If the FIB lookup fails, attempt to use the first non-loopback
- * interface with multicast capability in the system as a
- * last resort. The legacy IPv4 ASM API requires that we do
- * this in order to allow groups to be joined when the routing
- * table has not yet been populated during boot.
- *
- * Returns NULL if no ifp could be found, otherwise return referenced ifp.
+ * Look up the ifnet to join a multicast group membership via legacy
+ * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP.
  *
- * FUTURE: Implement IPv4 source-address selection.
+ * If the interface index was specified explicitly, just use it.  If the
+ * address was specified (legacy), try to find matching interface.  Else
+ * (index == 0 && no address) do a route lookup.  If that fails for a modern
+ * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first
+ * multicast capable interface.
  */
 static struct ifnet *
-inp_lookup_mcast_ifp(const struct inpcb *inp,
-    const struct sockaddr_in *gsin, const struct in_addr ina)
+inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr,
+const struct in_addr *ina, const u_int index)
 {
 	struct ifnet *ifp;
 	struct nhop_object *nh;
 
 	NET_EPOCH_ASSERT();
-	KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__));
-	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
-	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
-	    ("%s: not multicast", __func__));
 
-	ifp = NULL;
-	if (!in_nullhost(ina)) {
-		INADDR_TO_IFP(ina, ifp);
+	if (index != 0)
+		return (ifnet_byindex_ref(index));
+
+	if (ina != NULL && !in_nullhost(*ina)) {
+		INADDR_TO_IFP(*ina, ifp);
 		if (ifp != NULL)
 			if_ref(ifp);
-	} else {
-		nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0);
-		if (nh != NULL) {
-			ifp = nh->nh_ifp;
-			if_ref(ifp);
-		} else {
-			struct in_ifaddr *ia;
-			struct ifnet *mifp;
-
-			mifp = NULL;
-			CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-				mifp = ia->ia_ifp;
-				if (!(mifp->if_flags & IFF_LOOPBACK) &&
-				     (mifp->if_flags & IFF_MULTICAST)) {
-					ifp = mifp;
-					if_ref(ifp);
-					break;
-				}
+		return (ifp);
+	}
+
+	nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0);
+	if (nh != NULL) {
+		ifp = nh->nh_ifp;
+		if_ref(ifp);
+		return (ifp);
+	}
+
+	if (ina != NULL) {
+		struct in_ifaddr *ia;
+
+		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) &&
+			     (ia->ia_ifp->if_flags & IFF_MULTICAST)) {
+				ifp = ia->ia_ifp;
+				if_ref(ifp);
+				return (ifp);
 			}
 		}
 	}
 
-	return (ifp);
+	return (NULL);
 }
 
 /*
@@ -1926,13 +1909,13 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 	switch (sopt->sopt_name) {
 	case IP_ADD_MEMBERSHIP: {
 		struct ip_mreqn mreqn;
+		bool mreq;
 
-		if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
-			error = sooptcopyin(sopt, &mreqn,
-			    sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
-		else
-			error = sooptcopyin(sopt, &mreqn,
-			    sizeof(struct ip_mreq), sizeof(struct ip_mreq));
+		mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn));
+
+		error = sooptcopyin(sopt, &mreqn,
+		    mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn),
+		    mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn));
 		if (error)
 			return (error);
 
@@ -1943,12 +1926,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 			return (EINVAL);
 
 		NET_EPOCH_ENTER(et);
-		if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
-		    mreqn.imr_ifindex != 0)
-			ifp = ifnet_byindex_ref(mreqn.imr_ifindex);
-		else
-			ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
-			    mreqn.imr_address);
+		ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr,
+		    mreq ? &mreqn.imr_address : NULL,
+		    mreq ? 0 : mreqn.imr_ifindex);
 		NET_EPOCH_EXIT(et);
 		break;
 	}
@@ -1971,8 +1951,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		NET_EPOCH_ENTER(et);
-		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
-		    mreqs.imr_interface);
+		ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr,
+		    &mreqs.imr_interface, 0);
 		NET_EPOCH_EXIT(et);
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
@@ -2013,7 +1993,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 			return (EINVAL);
 
 		NET_EPOCH_ENTER(et);
-		ifp = ifnet_byindex_ref(gsr.gsr_interface);
+		ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL,
+		    gsr.gsr_interface);
 		NET_EPOCH_EXIT(et);
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 374b5595fcbc..5b89ca026e85 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -519,7 +519,7 @@ siftr_pkt_manager_thread(void *arg)
 			if (log_buf != NULL) {
 				alq_post_flags(siftr_alq, log_buf, 0);
 			}
-			for (;cnt > 0; cnt--) {
+			for (; cnt > 0; cnt--) {
 				pkt_node = STAILQ_FIRST(&tmp_pkt_queue);
 				STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes);
 				free(pkt_node, M_SIFTR_PKTNODE);
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 63bbe4bba11b..c54459bb5f01 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -39,15 +39,14 @@
  * First, and probably the main thing its used by Rack and BBR, it can
  * be used to call tcp_output() of a transport stack at some time in the future.
  * The normal way this is done is that tcp_output() of the stack schedules
- * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The
- * slot is the time from now that the stack wants to be called but it
- * must be converted to tcp_hpts's notion of slot. This is done with
- * one of the macros HPTS_MS_TO_SLOTS or HPTS_USEC_TO_SLOTS. So a typical
+ * itself to be called again by calling tcp_hpts_insert(tcpcb, usecs). The
+ * usecs is the time from now that the stack wants to be called and is
+ * passing time directly in microseconds. So a typical
  * call from the tcp_output() routine might look like:
  *
- * tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(550));
+ * tcp_hpts_insert(tp, 550, NULL);
  *
- * The above would schedule tcp_output() to be called in 550 useconds.
+ * The above would schedule tcp_output() to be called in 550 microseconds.
  * Note that if using this mechanism the stack will want to add near
  * its top a check to prevent unwanted calls (from user land or the
  * arrival of incoming ack's). So it would add something like:
@@ -149,27 +148,44 @@
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
 #include <netinet/tcp_log_buf.h>
 
 #ifdef tcp_offload
 #include <netinet/tcp_offload.h>
 #endif
 
-/*
- * The hpts uses a 102400 wheel. The wheel
- * defines the time in 10 usec increments (102400 x 10).
- * This gives a range of 10usec - 1024ms to place
- * an entry within. If the user requests more than
- * 1.024 second, a remaineder is attached and the hpts
- * when seeing the remainder will re-insert the
- * inpcb forward in time from where it is until
- * the remainder is zero.
- */
+/* Global instance for TCP HPTS */
+struct tcp_hptsi *tcp_hptsi_pace;
+
+/* Default function table for production use. */
+const struct tcp_hptsi_funcs tcp_hptsi_default_funcs = {
+	.microuptime = microuptime,
+	.swi_add = swi_add,
+	.swi_remove = swi_remove,
+	.swi_sched = swi_sched,
+	.intr_event_bind = intr_event_bind,
+	.intr_event_bind_ithread_cpuset = intr_event_bind_ithread_cpuset,
+	.callout_init = callout_init,
+	.callout_reset_sbt_on = callout_reset_sbt_on,
+	._callout_stop_safe = _callout_stop_safe,
+};
 
-#define NUM_OF_HPTSI_SLOTS 102400
+#ifdef TCP_HPTS_KTEST
+#define microuptime pace->funcs->microuptime
+#define swi_add pace->funcs->swi_add
+#define swi_remove pace->funcs->swi_remove
+#define swi_sched pace->funcs->swi_sched
+#define intr_event_bind pace->funcs->intr_event_bind
+#define intr_event_bind_ithread_cpuset pace->funcs->intr_event_bind_ithread_cpuset
+#define callout_init pace->funcs->callout_init
+#define callout_reset_sbt_on pace->funcs->callout_reset_sbt_on
+#define _callout_stop_safe pace->funcs->_callout_stop_safe
+#endif
 
-/* The number of connections after which the dynamic sleep logic kicks in. */
-#define DEFAULT_CONNECTION_THRESHOLD 100
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
+
+static void tcp_hpts_thread(void *ctx);
 
 /*
  * When using the hpts, a TCP stack must make sure
@@ -204,87 +220,22 @@
  *
  * When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
  * then we do a dynamic adjustment on the time we sleep.
- * Our threshold is if the lateness of the first client served (in ticks) is
+ * Our threshold is if the lateness of the first client served (in slots) is
  * greater than or equal too slots_indicate_more_sleep (10ms
- * or 10000 ticks). If we were that late, the actual sleep time
- * is adjusted down by 50%. If the ticks_ran is less than
- * slots_indicate_more_sleep (100 ticks or 1000usecs).
+ * or 10000 slots). If we were that late, the actual sleep time
+ * is adjusted down by 50%. If the slots_ran is less than
+ * slots_indicate_more_sleep (100 slots or 1000usecs).
  *
  */
 
-/* Each hpts has its own p_mtx which is used for locking */
-#define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
-#define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
-#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
-#define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
-struct tcp_hpts_entry {
-	/* Cache line 0x00 */
-	struct mtx p_mtx;	/* Mutex for hpts */
-	struct timeval p_mysleep;	/* Our min sleep time */
-	uint64_t syscall_cnt;
-	uint64_t sleeping;	/* What the actual sleep was (if sleeping) */
-	uint16_t p_hpts_active; /* Flag that says hpts is awake  */
-	uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
-	uint32_t p_curtick;	/* Tick in 10 us the hpts is going to */
-	uint32_t p_runningslot; /* Current tick we are at if we are running */
-	uint32_t p_prev_slot;	/* Previous slot we were on */
-	uint32_t p_cur_slot;	/* Current slot in wheel hpts is draining */
-	uint32_t p_nxt_slot;	/* The next slot outside the current range of
-				 * slots that the hpts is running on. */
-	int32_t p_on_queue_cnt;	/* Count on queue in this hpts */
-	uint32_t p_lasttick;	/* Last tick before the current one */
-	uint8_t p_direct_wake :1, /* boolean */
-		p_on_min_sleep:1, /* boolean */
-		p_hpts_wake_scheduled:1, /* boolean */
-		hit_callout_thresh:1,
-		p_avail:4;
-	uint8_t p_fill[3];	  /* Fill to 32 bits */
-	/* Cache line 0x40 */
-	struct hptsh {
-		TAILQ_HEAD(, tcpcb)	head;
-		uint32_t		count;
-		uint32_t		gencnt;
-	} *p_hptss;			/* Hptsi wheel */
-	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
-					 * of 255ms */
-	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
-	uint32_t saved_lasttick;	/* for logging */
-	uint32_t saved_curtick;		/* for logging */
-	uint32_t saved_curslot;		/* for logging */
-	uint32_t saved_prev_slot;       /* for logging */
-	uint32_t p_delayed_by;	/* How much were we delayed by */
-	/* Cache line 0x80 */
-	struct sysctl_ctx_list hpts_ctx;
-	struct sysctl_oid *hpts_root;
-	struct intr_event *ie;
-	void *ie_cookie;
-	uint16_t p_num;		/* The hpts number one per cpu */
-	uint16_t p_cpu;		/* The hpts CPU */
-	/* There is extra space in here */
-	/* Cache line 0x100 */
-	struct callout co __aligned(CACHE_LINE_SIZE);
-}               __aligned(CACHE_LINE_SIZE);
-
-static struct tcp_hptsi {
-	struct cpu_group **grps;
-	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
-	uint32_t *cts_last_ran;
-	uint32_t grp_cnt;
-	uint32_t rp_num_hptss;	/* Number of hpts threads */
-} tcp_pace;
-
-static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
 #ifdef RSS
-static int tcp_bind_threads = 1;
+int tcp_bind_threads = 1;
 #else
-static int tcp_bind_threads = 2;
+int tcp_bind_threads = 2;
 #endif
 static int tcp_use_irq_cpu = 0;
 static int hpts_does_tp_logging = 0;
-
-static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
-static void tcp_hpts_thread(void *ctx);
-
+static int32_t tcp_hpts_precision = 120;
 int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
 static int conn_cnt_thresh = DEFAULT_CONNECTION_THRESHOLD;
 static int32_t dynamic_min_sleep = DYNAMIC_MIN_SLEEP;
@@ -295,23 +246,6 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 SYSCTL_NODE(_net_inet_tcp_hpts, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "TCP Hpts statistics");
 
-#define	timersub(tvp, uvp, vvp)						\
-	do {								\
-		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
-		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
-		if ((vvp)->tv_usec < 0) {				\
-			(vvp)->tv_sec--;				\
-			(vvp)->tv_usec += 1000000;			\
-		}							\
-	} while (0)
-
-static int32_t tcp_hpts_precision = 120;
-
-static struct hpts_domain_info {
-	int count;
-	int cpu[MAXCPU];
-} hpts_domains[MAXMEMDOM];
-
 counter_u64_t hpts_hopelessly_behind;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD,
@@ -459,14 +393,14 @@ SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, nowake_over_thresh, CTLFLAG_RW,
     &tcp_hpts_no_wake_over_thresh, 0,
     "When we are over the threshold on the pacer do we prohibit wakeups?");
 
-static uint16_t
-hpts_random_cpu(void)
+uint16_t
+tcp_hptsi_random_cpu(struct tcp_hptsi *pace)
 {
 	uint16_t cpuid;
 	uint32_t ran;
 
 	ran = arc4random();
-	cpuid = (((ran & 0xffff) % mp_ncpus) % tcp_pace.rp_num_hptss);
+	cpuid = (((ran & 0xffff) % mp_ncpus) % pace->rp_num_hptss);
 	return (cpuid);
 }
 
@@ -487,13 +421,11 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
 		log.u_bbr.flex2 = hpts->p_cur_slot;
 		log.u_bbr.flex3 = hpts->p_prev_slot;
 		log.u_bbr.flex4 = idx;
-		log.u_bbr.flex5 = hpts->p_curtick;
 		log.u_bbr.flex6 = hpts->p_on_queue_cnt;
 		log.u_bbr.flex7 = hpts->p_cpu;
 		log.u_bbr.flex8 = (uint8_t)from_callout;
 		log.u_bbr.inflight = slots_to_run;
 		log.u_bbr.applimited = hpts->overidden_sleep;
-		log.u_bbr.delivered = hpts->saved_curtick;
 		log.u_bbr.timeStamp = tcp_tv_to_usec(tv);
 		log.u_bbr.epoch = hpts->saved_curslot;
 		log.u_bbr.lt_epoch = hpts->saved_prev_slot;
@@ -510,11 +442,67 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
 	}
 }
 
+/*
+ * Timeout handler for the HPTS sleep callout. It immediately schedules the SWI
+ * for the HPTS entry to run.
+ */
 static void
-tcp_wakehpts(struct tcp_hpts_entry *hpts)
+tcp_hpts_sleep_timeout(void *arg)
 {
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+	struct tcp_hpts_entry *hpts;
+
+	hpts = (struct tcp_hpts_entry *)arg;
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+	swi_sched(hpts->ie_cookie, 0);
+}
+
+/*
+ * Reset the HPTS callout timer with the provided timeval. Returns the results
+ * of the callout_reset_sbt_on() function.
+ */
+static int
+tcp_hpts_sleep(struct tcp_hpts_entry *hpts, struct timeval *tv)
+{
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+	sbintime_t sb;
+
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+
+	/* Store off to make visible the actual sleep time */
+	hpts->sleeping = tv->tv_usec;
+
+	sb = tvtosbt(*tv);
+	return (callout_reset_sbt_on(
+		    &hpts->co, sb, 0, tcp_hpts_sleep_timeout, hpts, hpts->p_cpu,
+		    (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))));
+}
+
+/*
+ * Schedules the SWI for the HTPS entry to run, if not already scheduled or
+ * running.
+ */
+void
+tcp_hpts_wake(struct tcp_hpts_entry *hpts)
+{
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+
 	HPTS_MTX_ASSERT(hpts);
 
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+
 	if (tcp_hpts_no_wake_over_thresh && (hpts->p_on_queue_cnt >= conn_cnt_thresh)) {
 		hpts->p_direct_wake = 0;
 		return;
@@ -526,15 +514,6 @@ tcp_wakehpts(struct tcp_hpts_entry *hpts)
 }
 
 static void
-hpts_timeout_swi(void *arg)
-{
-	struct tcp_hpts_entry *hpts;
-
-	hpts = (struct tcp_hpts_entry *)arg;
-	swi_sched(hpts->ie_cookie, 0);
-}
-
-static void
 tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
 {
 	struct inpcb *inp = tptoinpcb(tp);
@@ -562,13 +541,13 @@ tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
 }
 
 static struct tcp_hpts_entry *
-tcp_hpts_lock(struct tcpcb *tp)
+tcp_hpts_lock(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_pace.rp_ent[tp->t_hpts_cpu];
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
 	HPTS_LOCK(hpts);
 
 	return (hpts);
@@ -595,11 +574,10 @@ tcp_hpts_release(struct tcpcb *tp)
  * and has never received a first packet.
  */
 void
-tcp_hpts_init(struct tcpcb *tp)
+__tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
-
 	if (__predict_true(tp->t_hpts_cpu == HPTS_CPU_NONE)) {
-		tp->t_hpts_cpu = hpts_random_cpu();
+		tp->t_hpts_cpu = tcp_hptsi_random_cpu(pace);
 		MPASS(!(tp->t_flags2 & TF2_HPTS_CPU_SET));
 	}
 }
@@ -611,14 +589,14 @@ tcp_hpts_init(struct tcpcb *tp)
  * INP lock and then get the hpts lock.
  */
 void
-tcp_hpts_remove(struct tcpcb *tp)
+__tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 	struct hptsh *hptsh;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_hpts_lock(tp);
+	hpts = tcp_hpts_lock(pace, tp);
 	if (tp->t_in_hpts == IHPTS_ONQUEUE) {
 		hptsh = &hpts->p_hptss[tp->t_hpts_slot];
 		tp->t_hpts_request = 0;
@@ -662,23 +640,19 @@ hpts_slot(uint32_t wheel_slot, uint32_t plus)
 {
 	/*
 	 * Given a slot on the wheel, what slot
-	 * is that plus ticks out?
+	 * is that plus slots out?
 	 */
-	KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_slot));
+	KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid slot %u not on wheel", wheel_slot));
 	return ((wheel_slot + plus) % NUM_OF_HPTSI_SLOTS);
 }
 
 static inline int
-tick_to_wheel(uint32_t cts_in_wticks)
+cts_to_wheel(uint32_t cts)
 {
 	/*
-	 * Given a timestamp in ticks (so by
-	 * default to get it to a real time one
-	 * would multiply by 10.. i.e the number
-	 * of ticks in a slot) map it to our limited
-	 * space wheel.
+	 * Given a timestamp in useconds map it to our limited space wheel.
 	 */
-	return (cts_in_wticks % NUM_OF_HPTSI_SLOTS);
+	return ((cts / HPTS_USECS_PER_SLOT) % NUM_OF_HPTSI_SLOTS);
 }
 
 static inline int
@@ -721,7 +695,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 	if ((hpts->p_hpts_active == 1) &&
 	    (hpts->p_wheel_complete == 0)) {
 		end_slot = hpts->p_runningslot;
-		/* Back up one tick */
+		/* Back up one slot */
 		if (end_slot == 0)
 			end_slot = NUM_OF_HPTSI_SLOTS - 1;
 		else
@@ -734,7 +708,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 		 * not active, or we have
 		 * completed the pass over
 		 * the wheel, we can use the
-		 * prev tick and subtract one from it. This puts us
+		 * prev slot and subtract one from it. This puts us
 		 * as far out as possible on the wheel.
 		 */
 		end_slot = hpts->p_prev_slot;
@@ -747,7 +721,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 		/*
 		 * Now we have close to the full wheel left minus the
 		 * time it has been since the pacer went to sleep. Note
-		 * that wheel_tick, passed in, should be the current time
+		 * that wheel_slot, passed in, should be the current time
 		 * from the perspective of the caller, mapped to the wheel.
 		 */
 		if (hpts->p_prev_slot != wheel_slot)
@@ -824,7 +798,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 #ifdef INVARIANTS
 static void
 check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
-    uint32_t hptsslot, int line)
+    uint32_t hptsslot)
 {
 	/*
 	 * Sanity checks for the pacer with invariants
@@ -855,12 +829,13 @@ check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
 }
 #endif
 
-uint32_t
-tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_diag *diag)
+void
+__tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+	struct hpts_diag *diag)
 {
 	struct tcp_hpts_entry *hpts;
 	struct timeval tv;
-	uint32_t slot_on, wheel_cts, last_slot, need_new_to = 0;
+	uint32_t slot, wheel_cts, last_slot, need_new_to = 0;
 	int32_t wheel_slot, maxslots;
 	bool need_wakeup = false;
 
@@ -869,11 +844,13 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	MPASS(!(tp->t_in_hpts == IHPTS_ONQUEUE));
 
 	/*
+	 * Convert microseconds to slots for internal use.
 	 * We now return the next-slot the hpts will be on, beyond its
 	 * current run (if up) or where it was when it stopped if it is
 	 * sleeping.
 	 */
-	hpts = tcp_hpts_lock(tp);
+	slot = HPTS_USEC_TO_SLOTS(usecs);
+	hpts = tcp_hpts_lock(pace, tp);
 	microuptime(&tv);
 	if (diag) {
 		memset(diag, 0, sizeof(struct hpts_diag));
@@ -882,8 +859,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 		diag->p_runningslot = hpts->p_runningslot;
 		diag->p_nxt_slot = hpts->p_nxt_slot;
 		diag->p_cur_slot = hpts->p_cur_slot;
-		diag->p_curtick = hpts->p_curtick;
-		diag->p_lasttick = hpts->p_lasttick;
 		diag->slot_req = slot;
 		diag->p_on_min_sleep = hpts->p_on_min_sleep;
 		diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
@@ -910,17 +885,15 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 			 * timeout is not 1.
 			 */
 			hpts->p_direct_wake = 1;
-			tcp_wakehpts(hpts);
+			tcp_hpts_wake(hpts);
 		}
-		slot_on = hpts->p_nxt_slot;
 		HPTS_UNLOCK(hpts);
 
-		return (slot_on);
+		return;
 	}
-	/* Get the current time relative to the wheel */
-	wheel_cts = tcp_tv_to_hpts_slot(&tv);
-	/* Map it onto the wheel */
-	wheel_slot = tick_to_wheel(wheel_cts);
+	/* Get the current time stamp and map it onto the wheel */
+	wheel_cts = tcp_tv_to_usec(&tv);
+	wheel_slot = cts_to_wheel(wheel_cts);
 	/* Now what's the max we can place it at? */
 	maxslots = max_slots_available(hpts, wheel_slot, &last_slot);
 	if (diag) {
@@ -952,11 +925,11 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 		tp->t_hpts_slot = last_slot;
 	}
 	if (diag) {
-		diag->slot_remaining = tp->t_hpts_request;
+		diag->time_remaining = tp->t_hpts_request;
 		diag->inp_hptsslot = tp->t_hpts_slot;
 	}
 #ifdef INVARIANTS
-	check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot, line);
+	check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot);
 #endif
 	if (__predict_true(tp->t_in_hpts != IHPTS_MOVING))
 		tcp_hpts_insert_internal(tp, hpts);
@@ -995,12 +968,12 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	}
 	/*
 	 * Now how far is the hpts sleeping to? if active is 1, its
-	 * up and ticking we do nothing, otherwise we may need to
+	 * up and running we do nothing, otherwise we may need to
 	 * reschedule its callout if need_new_to is set from above.
 	 */
 	if (need_wakeup) {
 		hpts->p_direct_wake = 1;
-		tcp_wakehpts(hpts);
+		tcp_hpts_wake(hpts);
 		if (diag) {
 			diag->need_new_to = 0;
 			diag->co_ret = 0xffff0000;
@@ -1008,7 +981,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	} else if (need_new_to) {
 		int32_t co_ret;
 		struct timeval tv;
-		sbintime_t sb;
 
 		tv.tv_sec = 0;
 		tv.tv_usec = 0;
@@ -1016,24 +988,18 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 			tv.tv_sec++;
 			need_new_to -= HPTS_USEC_IN_SEC;
 		}
-		tv.tv_usec = need_new_to;
-		sb = tvtosbt(tv);
-		co_ret = callout_reset_sbt_on(&hpts->co, sb, 0,
-					      hpts_timeout_swi, hpts, hpts->p_cpu,
-					      (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+		tv.tv_usec = need_new_to; /* XXX: Why is this sleeping over the max? */
+		co_ret = tcp_hpts_sleep(hpts, &tv);
 		if (diag) {
 			diag->need_new_to = need_new_to;
 			diag->co_ret = co_ret;
 		}
 	}
-	slot_on = hpts->p_nxt_slot;
 	HPTS_UNLOCK(hpts);
-
-	return (slot_on);
 }
 
 static uint16_t
-hpts_cpuid(struct tcpcb *tp, int *failed)
+hpts_cpuid(struct tcp_hptsi *pace, struct tcpcb *tp, int *failed)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	u_int cpuid;
@@ -1060,7 +1026,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 #ifdef RSS
 	cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 	if (cpuid == NETISR_CPUID_NONE)
-		return (hpts_random_cpu());
+		return (tcp_hptsi_random_cpu(pace));
 	else
 		return (cpuid);
 #endif
@@ -1071,7 +1037,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 	 */
 	if (inp->inp_flowtype == M_HASHTYPE_NONE) {
 		counter_u64_add(cpu_uses_random, 1);
-		return (hpts_random_cpu());
+		return (tcp_hptsi_random_cpu(pace));
 	}
 	/*
 	 * Hash to a thread based on the flowid.  If we are using numa,
@@ -1086,7 +1052,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 #ifdef NUMA
 	} else {
 		/* Hash into the cpu's that use that domain */
-		di = &hpts_domains[inp->inp_numa_domain];
+		di = &pace->domains[inp->inp_numa_domain];
 		cpuid = di->cpu[inp->inp_flowid % di->count];
 	}
 #endif
@@ -1118,9 +1084,16 @@ tcp_hpts_set_max_sleep(struct tcp_hpts_entry *hpts, int wrap_loop_cnt)
 	}
 }
 
-static int32_t
+static bool
+tcp_hpts_different_slots(uint32_t cts, uint32_t cts_last_run)
+{
+	return ((cts / HPTS_USECS_PER_SLOT) != (cts_last_run / HPTS_USECS_PER_SLOT));
+}
+
+int32_t
 tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 {
+	struct tcp_hptsi *pace;
 	struct tcpcb *tp;
 	struct timeval tv;
 	int32_t slots_to_run, i, error;
@@ -1130,6 +1103,7 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 	int32_t wrap_loop_cnt = 0;
 	int32_t slot_pos_of_endpoint = 0;
 	int32_t orig_exit_slot;
+	uint32_t cts, cts_last_run;
 	bool completed_measure, seen_endpoint;
 
 	completed_measure = false;
@@ -1137,32 +1111,34 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 
 	HPTS_MTX_ASSERT(hpts);
 	NET_EPOCH_ASSERT();
+
+	pace = hpts->p_hptsi;
+	MPASS(pace != NULL);
+
 	/* record previous info for any logging */
-	hpts->saved_lasttick = hpts->p_lasttick;
-	hpts->saved_curtick = hpts->p_curtick;
 	hpts->saved_curslot = hpts->p_cur_slot;
 	hpts->saved_prev_slot = hpts->p_prev_slot;
 
-	hpts->p_lasttick = hpts->p_curtick;
-	hpts->p_curtick = tcp_gethptstick(&tv);
-	tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
-	orig_exit_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+	microuptime(&tv);
+	cts_last_run = pace->cts_last_ran[hpts->p_cpu];
+	pace->cts_last_ran[hpts->p_cpu] = cts = tcp_tv_to_usec(&tv);
+
+	orig_exit_slot = hpts->p_cur_slot = cts_to_wheel(cts);
 	if ((hpts->p_on_queue_cnt == 0) ||
-	    (hpts->p_lasttick == hpts->p_curtick)) {
+	    !tcp_hpts_different_slots(cts, cts_last_run)) {
 		/*
-		 * No time has yet passed,
-		 * or nothing to do.
+		 * Not enough time has yet passed or nothing to do.
 		 */
 		hpts->p_prev_slot = hpts->p_cur_slot;
-		hpts->p_lasttick = hpts->p_curtick;
 		goto no_run;
 	}
 again:
 	hpts->p_wheel_complete = 0;
 	HPTS_MTX_ASSERT(hpts);
 	slots_to_run = hpts_slots_diff(hpts->p_prev_slot, hpts->p_cur_slot);
-	if (((hpts->p_curtick - hpts->p_lasttick) > (NUM_OF_HPTSI_SLOTS - 1)) &&
-	    (hpts->p_on_queue_cnt != 0)) {
+	if ((hpts->p_on_queue_cnt != 0) &&
+	    ((cts - cts_last_run) >
+	     ((NUM_OF_HPTSI_SLOTS-1) * HPTS_USECS_PER_SLOT))) {
 		/*
 		 * Wheel wrap is occuring, basically we
 		 * are behind and the distance between
@@ -1238,7 +1214,7 @@ again:
 		uint32_t runningslot;
 
 		/*
-		 * Calculate our delay, if there are no extra ticks there
+		 * Calculate our delay, if there are no extra slots there
 		 * was not any (i.e. if slots_to_run == 1, no delay).
 		 */
 		hpts->p_delayed_by = (slots_to_run - (i + 1)) *
@@ -1391,7 +1367,7 @@ again:
 				 * gets added to the hpts (not this one)
 				 * :-)
 				 */
-				tcp_set_hpts(tp);
+				__tcp_set_hpts(pace, tp);
 			}
 			CURVNET_SET(inp->inp_vnet);
 			/* Lets do any logging that we might want to */
@@ -1450,10 +1426,12 @@ no_one:
 	hpts->p_delayed_by = 0;
 	/*
 	 * Check to see if we took an excess amount of time and need to run
-	 * more ticks (if we did not hit eno-bufs).
+	 * more slots (if we did not hit eno-bufs).
 	 */
 	hpts->p_prev_slot = hpts->p_cur_slot;
-	hpts->p_lasttick = hpts->p_curtick;
+	microuptime(&tv);
+	cts_last_run = cts;
+	cts = tcp_tv_to_usec(&tv);
 	if (!from_callout || (loop_cnt > max_pacer_loops)) {
 		/*
 		 * Something is serious slow we have
@@ -1465,7 +1443,7 @@ no_one:
 		 * can never catch up :(
 		 *
 		 * We will just lie to this thread
-		 * and let it thing p_curtick is
+		 * and let it think p_curslot is
 		 * correct. When it next awakens
 		 * it will find itself further behind.
 		 */
@@ -1473,20 +1451,19 @@ no_one:
 			counter_u64_add(hpts_hopelessly_behind, 1);
 		goto no_run;
 	}
-	hpts->p_curtick = tcp_gethptstick(&tv);
-	hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+
+	hpts->p_cur_slot = cts_to_wheel(cts);
 	if (!seen_endpoint) {
 		/* We saw no endpoint but we may be looping */
 		orig_exit_slot = hpts->p_cur_slot;
 	}
-	if ((wrap_loop_cnt < 2) &&
-	    (hpts->p_lasttick != hpts->p_curtick)) {
+	if ((wrap_loop_cnt < 2) && tcp_hpts_different_slots(cts, cts_last_run)) {
 		counter_u64_add(hpts_loops, 1);
 		loop_cnt++;
 		goto again;
 	}
 no_run:
-	tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
+	pace->cts_last_ran[hpts->p_cpu] = cts;
 	/*
 	 * Set flag to tell that we are done for
 	 * any slot input that happens during
@@ -1494,25 +1471,36 @@ no_run:
 	 */
 	hpts->p_wheel_complete = 1;
 	/*
-	 * Now did we spend too long running input and need to run more ticks?
-	 * Note that if wrap_loop_cnt < 2 then we should have the conditions
-	 * in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
-	 * is greater than 2, then the condtion most likely are *not* true.
-	 * Also if we are called not from the callout, we don't run the wheel
-	 * multiple times so the slots may not align either.
-	 */
-	KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
-		 (wrap_loop_cnt >= 2) || !from_callout),
-		("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
-		 hpts->p_prev_slot, hpts->p_cur_slot));
-	KASSERT(((hpts->p_lasttick == hpts->p_curtick)
-		 || (wrap_loop_cnt >= 2) || !from_callout),
-		("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
-		 hpts->p_lasttick, hpts->p_curtick));
-	if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
-		hpts->p_curtick = tcp_gethptstick(&tv);
+	* If enough time has elapsed that we should be processing the next
+	* slot(s), then we should have kept running and not marked the wheel as
+	* complete.
+	*
+	* But there are several other conditions where we would have stopped
+	* processing, so the prev/cur slots and cts variables won't match.
+	* These conditions are:
+	*
+	* - Calls not from callouts don't run multiple times
+	* - The wheel is empty
+	* - We've processed more than max_pacer_loops times
+	* - We've wrapped more than 2 times
+	*
+	* This assert catches when the logic above has violated this design.
+	*
+	*/
+	KASSERT((!from_callout || (hpts->p_on_queue_cnt == 0) ||
+		 (loop_cnt > max_pacer_loops) || (wrap_loop_cnt >= 2) ||
+		 ((hpts->p_prev_slot == hpts->p_cur_slot) &&
+		  !tcp_hpts_different_slots(cts, cts_last_run))),
+		("H:%p Shouldn't be done! prev_slot:%u, cur_slot:%u, "
+		 "cts_last_run:%u, cts:%u, loop_cnt:%d, wrap_loop_cnt:%d",
+		 hpts, hpts->p_prev_slot, hpts->p_cur_slot,
+		 cts_last_run, cts, loop_cnt, wrap_loop_cnt));
+
+	if (from_callout && tcp_hpts_different_slots(cts, cts_last_run)) {
+		microuptime(&tv);
+		cts = tcp_tv_to_usec(&tv);
+		hpts->p_cur_slot = cts_to_wheel(cts);
 		counter_u64_add(hpts_loops, 1);
-		hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
 		goto again;
 	}
 
@@ -1526,16 +1514,16 @@ no_run:
 }
 
 void
-tcp_set_hpts(struct tcpcb *tp)
+__tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 	int failed;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_hpts_lock(tp);
+	hpts = tcp_hpts_lock(pace, tp);
 	if (tp->t_in_hpts == IHPTS_NONE && !(tp->t_flags2 & TF2_HPTS_CPU_SET)) {
-		tp->t_hpts_cpu = hpts_cpuid(tp, &failed);
+		tp->t_hpts_cpu = hpts_cpuid(pace, tp, &failed);
 		if (failed == 0)
 			tp->t_flags2 |= TF2_HPTS_CPU_SET;
 	}
@@ -1543,33 +1531,35 @@ tcp_set_hpts(struct tcpcb *tp)
 }
 
 static struct tcp_hpts_entry *
-tcp_choose_hpts_to_run(void)
+tcp_choose_hpts_to_run(struct tcp_hptsi *pace)
 {
+	struct timeval tv;
 	int i, oldest_idx, start, end;
 	uint32_t cts, time_since_ran, calc;
 
-	cts = tcp_get_usecs(NULL);
+	microuptime(&tv);
+	cts = tcp_tv_to_usec(&tv);
 	time_since_ran = 0;
 	/* Default is all one group */
 	start = 0;
-	end = tcp_pace.rp_num_hptss;
+	end = pace->rp_num_hptss;
 	/*
 	 * If we have more than one L3 group figure out which one
 	 * this CPU is in.
 	 */
-	if (tcp_pace.grp_cnt > 1) {
-		for (i = 0; i < tcp_pace.grp_cnt; i++) {
-			if (CPU_ISSET(curcpu, &tcp_pace.grps[i]->cg_mask)) {
-				start = tcp_pace.grps[i]->cg_first;
-				end = (tcp_pace.grps[i]->cg_last + 1);
+	if (pace->grp_cnt > 1) {
+		for (i = 0; i < pace->grp_cnt; i++) {
+			if (CPU_ISSET(curcpu, &pace->grps[i]->cg_mask)) {
+				start = pace->grps[i]->cg_first;
+				end = (pace->grps[i]->cg_last + 1);
 				break;
 			}
 		}
 	}
 	oldest_idx = -1;
 	for (i = start; i < end; i++) {
-		if (TSTMP_GT(cts, tcp_pace.cts_last_ran[i]))
-			calc = cts - tcp_pace.cts_last_ran[i];
+		if (TSTMP_GT(cts, pace->cts_last_ran[i]))
+			calc = cts - pace->cts_last_ran[i];
 		else
 			calc = 0;
 		if (calc > time_since_ran) {
@@ -1578,9 +1568,9 @@ tcp_choose_hpts_to_run(void)
 		}
 	}
 	if (oldest_idx >= 0)
-		return(tcp_pace.rp_ent[oldest_idx]);
+		return(pace->rp_ent[oldest_idx]);
 	else
-		return(tcp_pace.rp_ent[(curcpu % tcp_pace.rp_num_hptss)]);
+		return(pace->rp_ent[(curcpu % pace->rp_num_hptss)]);
 }
 
 static void
@@ -1588,9 +1578,9 @@ __tcp_run_hpts(void)
 {
 	struct epoch_tracker et;
 	struct tcp_hpts_entry *hpts;
-	int ticks_ran;
+	int slots_ran;
 
-	hpts = tcp_choose_hpts_to_run();
+	hpts = tcp_choose_hpts_to_run(tcp_hptsi_pace);
 
 	if (hpts->p_hpts_active) {
 		/* Already active */
@@ -1606,12 +1596,11 @@ __tcp_run_hpts(void)
 	hpts->syscall_cnt++;
 	counter_u64_add(hpts_direct_call, 1);
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, false);
+	slots_ran = tcp_hptsi(hpts, false);
 	/* We may want to adjust the sleep values here */
 	if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
-		if (ticks_ran > slots_indicate_less_sleep) {
+		if (slots_ran > slots_indicate_less_sleep) {
 			struct timeval tv;
-			sbintime_t sb;
 
 			hpts->p_mysleep.tv_usec /= 2;
 			if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
@@ -1635,13 +1624,8 @@ __tcp_run_hpts(void)
 			 * the dynamic value and set the on_min_sleep
 			 * flag so we will not be awoken.
 			 */
-			sb = tvtosbt(tv);
-			/* Store off to make visible the actual sleep time */
-			hpts->sleeping = tv.tv_usec;
-			callout_reset_sbt_on(&hpts->co, sb, 0,
-					     hpts_timeout_swi, hpts, hpts->p_cpu,
-					     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
-		} else if (ticks_ran < slots_indicate_more_sleep) {
+			(void)tcp_hpts_sleep(hpts, &tv);
+		} else if (slots_ran < slots_indicate_more_sleep) {
 			/* For the further sleep, don't reschedule  hpts */
 			hpts->p_mysleep.tv_usec *= 2;
 			if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
@@ -1658,17 +1642,22 @@ out_with_mtx:
 static void
 tcp_hpts_thread(void *ctx)
 {
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
 	struct tcp_hpts_entry *hpts;
 	struct epoch_tracker et;
 	struct timeval tv;
-	sbintime_t sb;
-	int ticks_ran;
+	int slots_ran;
 
 	hpts = (struct tcp_hpts_entry *)ctx;
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
 	HPTS_LOCK(hpts);
 	if (hpts->p_direct_wake) {
 		/* Signaled by input or output with low occupancy count. */
-		callout_stop(&hpts->co);
+		_callout_stop_safe(&hpts->co, 0);
 		counter_u64_add(hpts_direct_awakening, 1);
 	} else {
 		/* Timed out, the normal case. */
@@ -1721,7 +1710,7 @@ tcp_hpts_thread(void *ctx)
 	}
 	hpts->sleeping = 0;
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, true);
+	slots_ran = tcp_hptsi(hpts, true);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
 	if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
@@ -1737,11 +1726,11 @@ tcp_hpts_thread(void *ctx)
 			 * Only adjust sleep time if we were
 			 * called from the callout i.e. direct_wake == 0.
 			 */
-			if (ticks_ran < slots_indicate_more_sleep) {
+			if (slots_ran < slots_indicate_more_sleep) {
 				hpts->p_mysleep.tv_usec *= 2;
 				if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
 					hpts->p_mysleep.tv_usec = dynamic_max_sleep;
-			} else if (ticks_ran > slots_indicate_less_sleep) {
+			} else if (slots_ran > slots_indicate_less_sleep) {
 				hpts->p_mysleep.tv_usec /= 2;
 				if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
 					hpts->p_mysleep.tv_usec = dynamic_min_sleep;
@@ -1797,18 +1786,11 @@ tcp_hpts_thread(void *ctx)
 	hpts->p_hpts_active = 0;
 back_to_sleep:
 	hpts->p_direct_wake = 0;
-	sb = tvtosbt(tv);
-	/* Store off to make visible the actual sleep time */
-	hpts->sleeping = tv.tv_usec;
-	callout_reset_sbt_on(&hpts->co, sb, 0,
-			     hpts_timeout_swi, hpts, hpts->p_cpu,
-			     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+	(void)tcp_hpts_sleep(hpts, &tv);
 	NET_EPOCH_EXIT(et);
 	HPTS_UNLOCK(hpts);
 }
 
-#undef	timersub
-
 static int32_t
 hpts_count_level(struct cpu_group *cg)
 {
@@ -1845,57 +1827,63 @@ hpts_gather_grps(struct cpu_group **grps, int32_t *at, int32_t max, struct cpu_g
 	}
 }
 
-static void
-tcp_hpts_mod_load(void)
+/*
+ * Initialize a tcp_hptsi structure. This performs the core initialization
+ * without starting threads.
+ */
+struct tcp_hptsi*
+tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs, bool enable_sysctl)
 {
+	struct tcp_hptsi *pace;
 	struct cpu_group *cpu_top;
-	int32_t error __diagused;
-	int32_t i, j, bound = 0, created = 0;
+	uint32_t i, j, cts;
+	int32_t count;
 	size_t sz, asz;
 	struct timeval tv;
-	sbintime_t sb;
 	struct tcp_hpts_entry *hpts;
-	struct pcpu *pc;
 	char unit[16];
 	uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
-	int count, domain;
 
+	KASSERT(funcs != NULL, ("funcs is NULL"));
+
+	/* Allocate the main structure */
+	pace = malloc(sizeof(struct tcp_hptsi), M_TCPHPTS, M_WAITOK | M_ZERO);
+	if (pace == NULL)
+		return (NULL);
+
+	memset(pace, 0, sizeof(*pace));
+	pace->funcs = funcs;
+
+	/* Setup CPU topology information */
 #ifdef SMP
 	cpu_top = smp_topo();
 #else
 	cpu_top = NULL;
 #endif
-	tcp_pace.rp_num_hptss = ncpus;
-	hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
-	hpts_loops = counter_u64_alloc(M_WAITOK);
-	back_tosleep = counter_u64_alloc(M_WAITOK);
-	combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
-	wheel_wrap = counter_u64_alloc(M_WAITOK);
-	hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
-	hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
-	hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
-	hpts_direct_call = counter_u64_alloc(M_WAITOK);
-	cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
-	cpu_uses_random = counter_u64_alloc(M_WAITOK);
+	pace->rp_num_hptss = ncpus;
 
-	sz = (tcp_pace.rp_num_hptss * sizeof(struct tcp_hpts_entry *));
-	tcp_pace.rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
-	sz = (sizeof(uint32_t) * tcp_pace.rp_num_hptss);
-	tcp_pace.cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
-	tcp_pace.grp_cnt = 0;
+	/* Allocate hpts entry array */
+	sz = (pace->rp_num_hptss * sizeof(struct tcp_hpts_entry *));
+	pace->rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
+
+	/* Allocate timestamp tracking array */
+	sz = (sizeof(uint32_t) * pace->rp_num_hptss);
+	pace->cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
+
+	/* Setup CPU groups */
 	if (cpu_top == NULL) {
-		tcp_pace.grp_cnt = 1;
+		pace->grp_cnt = 1;
 	} else {
 		/* Find out how many cache level 3 domains we have */
 		count = 0;
-		tcp_pace.grp_cnt = hpts_count_level(cpu_top);
-		if (tcp_pace.grp_cnt == 0) {
-			tcp_pace.grp_cnt = 1;
+		pace->grp_cnt = hpts_count_level(cpu_top);
+		if (pace->grp_cnt == 0) {
+			pace->grp_cnt = 1;
 		}
-		sz = (tcp_pace.grp_cnt * sizeof(struct cpu_group *));
-		tcp_pace.grps = malloc(sz, M_TCPHPTS, M_WAITOK);
+		sz = (pace->grp_cnt * sizeof(struct cpu_group *));
+		pace->grps = malloc(sz, M_TCPHPTS, M_WAITOK);
 		/* Now populate the groups */
-		if (tcp_pace.grp_cnt == 1) {
+		if (pace->grp_cnt == 1) {
 			/*
 			 * All we need is the top level all cpu's are in
 			 * the same cache so when we use grp[0]->cg_mask
@@ -1903,193 +1891,290 @@ tcp_hpts_mod_load(void)
 			 * all cpu's in it. The level here is probably
 			 * zero which is ok.
 			 */
-			tcp_pace.grps[0] = cpu_top;
+			pace->grps[0] = cpu_top;
 		} else {
 			/*
 			 * Here we must find all the level three cache domains
 			 * and setup our pointers to them.
 			 */
 			count = 0;
-			hpts_gather_grps(tcp_pace.grps, &count, tcp_pace.grp_cnt, cpu_top);
+			hpts_gather_grps(pace->grps, &count, pace->grp_cnt, cpu_top);
 		}
 	}
+
+	/* Cache the current time for initializing the hpts entries */
+	microuptime(&tv);
+	cts = tcp_tv_to_usec(&tv);
+
+	/* Initialize each hpts entry */
 	asz = sizeof(struct hptsh) * NUM_OF_HPTSI_SLOTS;
-	for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		tcp_pace.rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		pace->rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
 		    M_TCPHPTS, M_WAITOK | M_ZERO);
-		tcp_pace.rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS, M_WAITOK);
-		hpts = tcp_pace.rp_ent[i];
-		/*
-		 * Init all the hpts structures that are not specifically
-		 * zero'd by the allocations. Also lets attach them to the
-		 * appropriate sysctl block as well.
-		 */
-		mtx_init(&hpts->p_mtx, "tcp_hpts_lck",
-		    "hpts", MTX_DEF | MTX_DUPOK);
-		for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
-			TAILQ_INIT(&hpts->p_hptss[j].head);
-			hpts->p_hptss[j].count = 0;
-			hpts->p_hptss[j].gencnt = 0;
-		}
-		sysctl_ctx_init(&hpts->hpts_ctx);
-		sprintf(unit, "%d", i);
-		hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
-		    SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
-		    OID_AUTO,
-		    unit,
-		    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
-		    "");
-		SYSCTL_ADD_INT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "out_qcnt", CTLFLAG_RD,
-		    &hpts->p_on_queue_cnt, 0,
-		    "Count TCB's awaiting output processing");
-		SYSCTL_ADD_U16(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "active", CTLFLAG_RD,
-		    &hpts->p_hpts_active, 0,
-		    "Is the hpts active");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "curslot", CTLFLAG_RD,
-		    &hpts->p_cur_slot, 0,
-		    "What the current running pacers goal");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "runtick", CTLFLAG_RD,
-		    &hpts->p_runningslot, 0,
-		    "What the running pacers current slot is");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "curtick", CTLFLAG_RD,
-		    &hpts->p_curtick, 0,
-		    "What the running pacers last tick mapped to the wheel was");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "lastran", CTLFLAG_RD,
-		    &tcp_pace.cts_last_ran[i], 0,
-		    "The last usec tick that this hpts ran");
-		SYSCTL_ADD_LONG(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
-		    &hpts->p_mysleep.tv_usec,
-		    "What the running pacers is using for p_mysleep.tv_usec");
-		SYSCTL_ADD_U64(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "now_sleeping", CTLFLAG_RD,
-		    &hpts->sleeping, 0,
-		    "What the running pacers is actually sleeping for");
-		SYSCTL_ADD_U64(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "syscall_cnt", CTLFLAG_RD,
-		    &hpts->syscall_cnt, 0,
-		    "How many times we had syscalls on this hpts");
+		pace->rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS,
+		    M_WAITOK | M_ZERO);
+		hpts = pace->rp_ent[i];
 
+		/* Basic initialization */
 		hpts->p_hpts_sleep_time = hpts_sleep_max;
-		hpts->p_num = i;
-		hpts->p_curtick = tcp_gethptstick(&tv);
-		tcp_pace.cts_last_ran[i] = tcp_tv_to_usec(&tv);
-		hpts->p_prev_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
-		hpts->p_cpu = 0xffff;
+		hpts->p_cpu = i;
+		pace->cts_last_ran[i] = cts;
+		hpts->p_cur_slot = cts_to_wheel(cts);
+		hpts->p_prev_slot = hpts->p_cur_slot;
 		hpts->p_nxt_slot = hpts_slot(hpts->p_cur_slot, 1);
 		callout_init(&hpts->co, 1);
+		hpts->p_hptsi = pace;
+		mtx_init(&hpts->p_mtx, "tcp_hpts_lck", "hpts",
+		    MTX_DEF | MTX_DUPOK);
+		for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
+			TAILQ_INIT(&hpts->p_hptss[j].head);
+		}
+
+		/* Setup SYSCTL if requested */
+		if (enable_sysctl) {
+			sysctl_ctx_init(&hpts->hpts_ctx);
+			sprintf(unit, "%d", i);
+			hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
+			    SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
+			    OID_AUTO,
+			    unit,
+			    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+			    "");
+			SYSCTL_ADD_INT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "out_qcnt", CTLFLAG_RD,
+			    &hpts->p_on_queue_cnt, 0,
+			    "Count TCB's awaiting output processing");
+			SYSCTL_ADD_U16(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "active", CTLFLAG_RD,
+			    &hpts->p_hpts_active, 0,
+			    "Is the hpts active");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "curslot", CTLFLAG_RD,
+			    &hpts->p_cur_slot, 0,
+			    "What the current running pacers goal");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "runslot", CTLFLAG_RD,
+			    &hpts->p_runningslot, 0,
+			    "What the running pacers current slot is");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "lastran", CTLFLAG_RD,
+			    &pace->cts_last_ran[i], 0,
+			    "The last usec timestamp that this hpts ran");
+			SYSCTL_ADD_LONG(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
+			    &hpts->p_mysleep.tv_usec,
+			    "What the running pacers is using for p_mysleep.tv_usec");
+			SYSCTL_ADD_U64(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "now_sleeping", CTLFLAG_RD,
+			    &hpts->sleeping, 0,
+			    "What the running pacers is actually sleeping for");
+			SYSCTL_ADD_U64(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "syscall_cnt", CTLFLAG_RD,
+			    &hpts->syscall_cnt, 0,
+			    "How many times we had syscalls on this hpts");
+		}
 	}
-	/* Don't try to bind to NUMA domains if we don't have any */
-	if (vm_ndomains == 1 && tcp_bind_threads == 2)
-		tcp_bind_threads = 0;
 
-	/*
-	 * Now lets start ithreads to handle the hptss.
-	 */
-	for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		hpts = tcp_pace.rp_ent[i];
-		hpts->p_cpu = i;
+	return (pace);
+}
+
+/*
+ * Create threads for a tcp_hptsi structure and starts timers for the current
+ * (minimum) sleep interval.
+ */
+void
+tcp_hptsi_start(struct tcp_hptsi *pace)
+{
+	struct tcp_hpts_entry *hpts;
+	struct pcpu *pc;
+	struct timeval tv;
+	uint32_t i, j;
+	int count, domain;
+	int error __diagused;
+
+	KASSERT(pace != NULL, ("tcp_hptsi_start: pace is NULL"));
+
+	/* Start threads for each hpts entry */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+
+		KASSERT(hpts->ie_cookie == NULL,
+		    ("tcp_hptsi_start: hpts[%d]->ie_cookie is not NULL", i));
 
 		error = swi_add(&hpts->ie, "hpts",
 		    tcp_hpts_thread, (void *)hpts,
 		    SWI_NET, INTR_MPSAFE, &hpts->ie_cookie);
 		KASSERT(error == 0,
-			("Can't add hpts:%p i:%d err:%d",
-			 hpts, i, error));
-		created++;
-		hpts->p_mysleep.tv_sec = 0;
-		hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
+		    ("Can't add hpts:%p i:%d err:%d", hpts, i, error));
+
 		if (tcp_bind_threads == 1) {
-			if (intr_event_bind(hpts->ie, i) == 0)
-				bound++;
+			(void)intr_event_bind(hpts->ie, i);
 		} else if (tcp_bind_threads == 2) {
 			/* Find the group for this CPU (i) and bind into it */
-			for (j = 0; j < tcp_pace.grp_cnt; j++) {
-				if (CPU_ISSET(i, &tcp_pace.grps[j]->cg_mask)) {
+			for (j = 0; j < pace->grp_cnt; j++) {
+				if (CPU_ISSET(i, &pace->grps[j]->cg_mask)) {
 					if (intr_event_bind_ithread_cpuset(hpts->ie,
-						&tcp_pace.grps[j]->cg_mask) == 0) {
-						bound++;
+					    &pace->grps[j]->cg_mask) == 0) {
 						pc = pcpu_find(i);
 						domain = pc->pc_domain;
-						count = hpts_domains[domain].count;
-						hpts_domains[domain].cpu[count] = i;
-						hpts_domains[domain].count++;
+						count = pace->domains[domain].count;
+						pace->domains[domain].cpu[count] = i;
+						pace->domains[domain].count++;
 						break;
 					}
 				}
 			}
 		}
+
+		hpts->p_mysleep.tv_sec = 0;
+		hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
 		tv.tv_sec = 0;
 		tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
-		hpts->sleeping = tv.tv_usec;
-		sb = tvtosbt(tv);
-		callout_reset_sbt_on(&hpts->co, sb, 0,
-				     hpts_timeout_swi, hpts, hpts->p_cpu,
-				     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
-	}
-	/*
-	 * If we somehow have an empty domain, fall back to choosing
-	 * among all htps threads.
-	 */
-	for (i = 0; i < vm_ndomains; i++) {
-		if (hpts_domains[i].count == 0) {
-			tcp_bind_threads = 0;
-			break;
-		}
+		(void)tcp_hpts_sleep(hpts, &tv);
 	}
-	tcp_hpts_softclock = __tcp_run_hpts;
-	tcp_lro_hpts_init();
-	printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
-	    created, bound,
-	    tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
 }
 
-static void
-tcp_hpts_mod_unload(void)
+/*
+ * Stop all callouts/threads for a tcp_hptsi structure.
+ */
+void
+tcp_hptsi_stop(struct tcp_hptsi *pace)
 {
+	struct tcp_hpts_entry *hpts;
 	int rv __diagused;
+	uint32_t i;
 
-	tcp_lro_hpts_uninit();
-	atomic_store_ptr(&tcp_hpts_softclock, NULL);
+	KASSERT(pace != NULL, ("tcp_hptsi_stop: pace is NULL"));
 
-	for (int i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		struct tcp_hpts_entry *hpts = tcp_pace.rp_ent[i];
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+		KASSERT(hpts != NULL, ("tcp_hptsi_stop: hpts[%d] is NULL", i));
+		KASSERT(hpts->ie_cookie != NULL,
+		    ("tcp_hptsi_stop: hpts[%d]->ie_cookie is NULL", i));
 
-		rv = callout_drain(&hpts->co);
+		rv = _callout_stop_safe(&hpts->co, CS_DRAIN);
 		MPASS(rv != 0);
 
 		rv = swi_remove(hpts->ie_cookie);
 		MPASS(rv == 0);
+		hpts->ie_cookie = NULL;
+	}
+}
 
-		rv = sysctl_ctx_free(&hpts->hpts_ctx);
-		MPASS(rv == 0);
+/*
+ * Destroy a tcp_hptsi structure initialized by tcp_hptsi_create.
+ */
+void
+tcp_hptsi_destroy(struct tcp_hptsi *pace)
+{
+	struct tcp_hpts_entry *hpts;
+	uint32_t i;
+
+	KASSERT(pace != NULL, ("tcp_hptsi_destroy: pace is NULL"));
+	KASSERT(pace->rp_ent != NULL, ("tcp_hptsi_destroy: pace->rp_ent is NULL"));
+
+	/* Cleanup each hpts entry */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+		if (hpts != NULL) {
+			/* Cleanup SYSCTL if it was initialized */
+			if (hpts->hpts_root != NULL) {
+				sysctl_ctx_free(&hpts->hpts_ctx);
+			}
 
-		mtx_destroy(&hpts->p_mtx);
-		free(hpts->p_hptss, M_TCPHPTS);
-		free(hpts, M_TCPHPTS);
+			mtx_destroy(&hpts->p_mtx);
+			free(hpts->p_hptss, M_TCPHPTS);
+			free(hpts, M_TCPHPTS);
+		}
 	}
 
-	free(tcp_pace.rp_ent, M_TCPHPTS);
-	free(tcp_pace.cts_last_ran, M_TCPHPTS);
+	/* Cleanup main arrays */
+	free(pace->rp_ent, M_TCPHPTS);
+	free(pace->cts_last_ran, M_TCPHPTS);
 #ifdef SMP
-	free(tcp_pace.grps, M_TCPHPTS);
+	free(pace->grps, M_TCPHPTS);
 #endif
 
+	/* Free the main structure */
+	free(pace, M_TCPHPTS);
+}
+
+static int
+tcp_hpts_mod_load(void)
+{
+	int i;
+
+	/* Don't try to bind to NUMA domains if we don't have any */
+	if (vm_ndomains == 1 && tcp_bind_threads == 2)
+		tcp_bind_threads = 0;
+
+	/* Create the tcp_hptsi structure */
+	tcp_hptsi_pace = tcp_hptsi_create(&tcp_hptsi_default_funcs, true);
+	if (tcp_hptsi_pace == NULL)
+		return (ENOMEM);
+
+	/* Initialize global counters */
+	hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
+	hpts_loops = counter_u64_alloc(M_WAITOK);
+	back_tosleep = counter_u64_alloc(M_WAITOK);
+	combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
+	wheel_wrap = counter_u64_alloc(M_WAITOK);
+	hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
+	hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
+	hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
+	hpts_direct_call = counter_u64_alloc(M_WAITOK);
+	cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
+	cpu_uses_random = counter_u64_alloc(M_WAITOK);
+
+	/* Start the threads */
+	tcp_hptsi_start(tcp_hptsi_pace);
+
+	/* Enable the global HPTS softclock function */
+	tcp_hpts_softclock = __tcp_run_hpts;
+
+	/* Initialize LRO HPTS */
+	tcp_lro_hpts_init();
+
+	/*
+	 * If we somehow have an empty domain, fall back to choosing among all
+	 * HPTS threads.
+	 */
+	for (i = 0; i < vm_ndomains; i++) {
+		if (tcp_hptsi_pace->domains[i].count == 0) {
+			tcp_bind_threads = 0;
+			break;
+		}
+	}
+
+	printf("TCP HPTS started %u (%s) swi interrupt threads\n",
+		tcp_hptsi_pace->rp_num_hptss, (tcp_bind_threads == 0) ?
+		 "(unbounded)" :
+		 (tcp_bind_threads == 1 ? "per-cpu" : "per-NUMA-domain"));
+
+	return (0);
+}
+
+static void
+tcp_hpts_mod_unload(void)
+{
+	tcp_lro_hpts_uninit();
+
+	/* Disable the global HPTS softclock function */
+	atomic_store_ptr(&tcp_hpts_softclock, NULL);
+
+	tcp_hptsi_stop(tcp_hptsi_pace);
+	tcp_hptsi_destroy(tcp_hptsi_pace);
+	tcp_hptsi_pace = NULL;
+
+	/* Cleanup global counters */
 	counter_u64_free(hpts_hopelessly_behind);
 	counter_u64_free(hpts_loops);
 	counter_u64_free(back_tosleep);
@@ -2104,13 +2189,11 @@ tcp_hpts_mod_unload(void)
 }
 
 static int
-tcp_hpts_modevent(module_t mod, int what, void *arg)
+tcp_hpts_mod_event(module_t mod, int what, void *arg)
 {
-
 	switch (what) {
 	case MOD_LOAD:
-		tcp_hpts_mod_load();
-		return (0);
+		return (tcp_hpts_mod_load());
 	case MOD_QUIESCE:
 		/*
 		 * Since we are a dependency of TCP stack modules, they should
@@ -2130,7 +2213,7 @@ tcp_hpts_modevent(module_t mod, int what, void *arg)
 
 static moduledata_t tcp_hpts_module = {
 	.name = "tcphpts",
-	.evhand = tcp_hpts_modevent,
+	.evhand = tcp_hpts_mod_event,
 };
 
 DECLARE_MODULE(tcphpts, tcp_hpts_module, SI_SUB_SOFTINTR, SI_ORDER_ANY);
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 6172baf2a062..6b05f9701ac2 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -28,19 +28,11 @@
 
 /* Number of useconds represented by an hpts slot */
 #define HPTS_USECS_PER_SLOT 10
-#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
-#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
 #define HPTS_USEC_IN_SEC 1000000
 #define HPTS_MSEC_IN_SEC 1000
 #define HPTS_USEC_IN_MSEC 1000
 
 static inline uint32_t
-tcp_tv_to_hpts_slot(const struct timeval *sv)
-{
-	return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_USECS_PER_SLOT));
-}
-
-static inline uint32_t
 tcp_tv_to_usec(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
@@ -66,7 +58,7 @@ struct hpts_diag {
 	uint32_t p_runningslot;		/* bbr->inflight */
 	uint32_t slot_req;		/* bbr->flex3 x */
 	uint32_t inp_hptsslot;		/* bbr->flex4 x */
-	uint32_t slot_remaining;	/* bbr->flex5 x */
+	uint32_t time_remaining;	/* bbr->flex5 x */
 	uint32_t have_slept;		/* bbr->epoch x */
 	uint32_t hpts_sleep_time;	/* bbr->applimited x */
 	uint32_t yet_to_sleep;		/* bbr->lt_epoch x */
@@ -75,8 +67,6 @@ struct hpts_diag {
 	uint32_t maxslots;		/* bbr->delRate x */
 	uint32_t wheel_cts;		/* bbr->rttProp x */
 	int32_t co_ret; 		/* bbr->pkts_out x */
-	uint32_t p_curtick;		/* upper bbr->cur_del_rate */
-	uint32_t p_lasttick;		/* lower bbr->cur_del_rate */
 	uint8_t p_on_min_sleep; 	/* bbr->flex8 x */
 };
 
@@ -92,13 +82,18 @@ struct hpts_diag {
 
 #ifdef _KERNEL
 
+extern struct tcp_hptsi *tcp_hptsi_pace;
+
 /*
  * The following are the definitions for the kernel HPTS interface for managing
  * the HPTS ring and the TCBs on it.
 */
 
-void tcp_hpts_init(struct tcpcb *);
-void tcp_hpts_remove(struct tcpcb *);
+void __tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_init(tp) __tcp_hpts_init(tcp_hptsi_pace, tp)
+
+void __tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_remove(tp) __tcp_hpts_remove(tcp_hptsi_pace, tp)
 
 static inline bool
 tcp_in_hpts(struct tcpcb *tp)
@@ -132,12 +127,13 @@ tcp_in_hpts(struct tcpcb *tp)
  * that INP_WLOCK() or from destroying your TCB where again
  * you should already have the INP_WLOCK().
  */
-uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
-    struct hpts_diag *diag);
-#define	tcp_hpts_insert(inp, slot)	\
-	tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
+void __tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+	struct hpts_diag *diag);
+#define	tcp_hpts_insert(tp, usecs, diag)	\
+	__tcp_hpts_insert(tcp_hptsi_pace, (tp), (usecs), (diag))
 
-void tcp_set_hpts(struct tcpcb *tp);
+void __tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp);
+#define tcp_set_hpts(tp) __tcp_set_hpts(tcp_hptsi_pace, tp)
 
 extern int32_t tcp_min_hptsi_time;
 
@@ -147,17 +143,6 @@ get_hpts_min_sleep_time(void)
 	return (tcp_min_hptsi_time + HPTS_USECS_PER_SLOT);
 }
 
-static inline uint32_t
-tcp_gethptstick(struct timeval *sv)
-{
-	struct timeval tv;
-
-	if (sv == NULL)
-		sv = &tv;
-	microuptime(sv);
-	return (tcp_tv_to_hpts_slot(sv));
-}
-
 static inline uint64_t
 tcp_get_u64_usecs(struct timeval *tv)
 {
@@ -180,12 +165,5 @@ tcp_get_usecs(struct timeval *tv)
 	return (tcp_tv_to_usec(tv));
 }
 
-/*
- * LRO HPTS initialization and uninitialization, only for internal use by the
- * HPTS code.
- */
-void tcp_lro_hpts_init(void);
-void tcp_lro_hpts_uninit(void);
-
 #endif /* _KERNEL */
 #endif /* __tcp_hpts_h__ */
diff --git a/sys/netinet/tcp_hpts_internal.h b/sys/netinet/tcp_hpts_internal.h
new file mode 100644
index 000000000000..8b33e03a6981
--- /dev/null
+++ b/sys/netinet/tcp_hpts_internal.h
@@ -0,0 +1,184 @@
+/*-
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __tcp_hpts_internal_h__
+#define __tcp_hpts_internal_h__
+
+/*
+ * TCP High Precision Timer System (HPTS) - Internal Definitions
+ *
+ * This header contains internal structures, constants, and interfaces that are
+ * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of
+ * the HPTS subsystem.
+ */
+
+#if defined(_KERNEL)
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* Convert microseconds to HPTS slots */
+#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+extern int tcp_bind_threads; 		/* Thread binding configuration
+					 * (0=none, 1=cpu, 2=numa) */
+
+/*
+ * Abstraction layer controlling time, interrupts and callouts.
+ */
+struct tcp_hptsi_funcs {
+	void (*microuptime)(struct timeval *tv);
+	int (*swi_add)(struct intr_event **eventp, const char *name,
+		driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+		void **cookiep);
+	int (*swi_remove)(void *cookie);
+	void (*swi_sched)(void *cookie, int flags);
+	int (*intr_event_bind)(struct intr_event *ie, int cpu);
+	int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie,
+		struct _cpuset *mask);
+	void (*callout_init)(struct callout *c, int mpsafe);
+	int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt,
+		sbintime_t precision, void (*func)(void *), void *arg, int cpu,
+		int flags);
+	int (*_callout_stop_safe)(struct callout *c, int flags);
+};
+
+/* Default function table for system operation */
+extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs;
+
+/* Each hpts has its own p_mtx which is used for locking */
+#define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
+#define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
+#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
+#define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
+
+struct tcp_hpts_entry {
+	/* Cache line 0x00 */
+	struct mtx p_mtx;		/* Mutex for hpts */
+	struct timeval p_mysleep;	/* Our min sleep time */
+	uint64_t syscall_cnt;
+	uint64_t sleeping;		/* What the actual sleep was (if sleeping) */
+	uint16_t p_hpts_active; 	/* Flag that says hpts is awake  */
+	uint8_t p_wheel_complete; 	/* have we completed the wheel arc walk? */
+	uint32_t p_runningslot; 	/* Current slot we are at if we are running */
+	uint32_t p_prev_slot;		/* Previous slot we were on */
+	uint32_t p_cur_slot;		/* Current slot in wheel hpts is draining */
+	uint32_t p_nxt_slot;		/* The next slot outside the current range
+					 * of slots that the hpts is running on. */
+	int32_t p_on_queue_cnt;		/* Count on queue in this hpts */
+	uint8_t p_direct_wake :1, 	/* boolean */
+		p_on_min_sleep:1, 	/* boolean */
+		p_hpts_wake_scheduled:1,/* boolean */
+		hit_callout_thresh:1,
+		p_avail:4;
+	uint8_t p_fill[3];		/* Fill to 32 bits */
+	/* Cache line 0x40 */
+	struct hptsh {
+		TAILQ_HEAD(, tcpcb)	head;
+		uint32_t		count;
+		uint32_t		gencnt;
+	} *p_hptss;			/* Hptsi wheel */
+	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
+					 * of 255ms */
+	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
+	uint32_t saved_curslot;		/* for logging */
+	uint32_t saved_prev_slot;	/* for logging */
+	uint32_t p_delayed_by;		/* How much were we delayed by */
+	/* Cache line 0x80 */
+	struct sysctl_ctx_list hpts_ctx;
+	struct sysctl_oid *hpts_root;
+	struct intr_event *ie;
+	void *ie_cookie;
+	uint16_t p_cpu;			/* The hpts CPU */
+	struct tcp_hptsi *p_hptsi;	/* Back pointer to parent hptsi structure */
+	/* There is extra space in here */
+	/* Cache line 0x100 */
+	struct callout co __aligned(CACHE_LINE_SIZE);
+}               __aligned(CACHE_LINE_SIZE);
+
+struct tcp_hptsi {
+	struct cpu_group **grps;
+	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
+	uint32_t *cts_last_ran;
+	uint32_t grp_cnt;
+	uint32_t rp_num_hptss;		/* Number of hpts threads */
+	struct hpts_domain_info {
+		int count;
+		int cpu[MAXCPU];
+	} domains[MAXMEMDOM];		/* Per-NUMA domain CPU assignments */
+	const struct tcp_hptsi_funcs *funcs;	/* Function table for testability */
+};
+
+/*
+ * Core tcp_hptsi structure manipulation functions.
+ */
+struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs,
+	bool enable_sysctl);
+void tcp_hptsi_destroy(struct tcp_hptsi *pace);
+void tcp_hptsi_start(struct tcp_hptsi *pace);
+void tcp_hptsi_stop(struct tcp_hptsi *pace);
+uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace);
+int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
+
+void tcp_hpts_wake(struct tcp_hpts_entry *hpts);
+
+/*
+ * LRO HPTS initialization and uninitialization, only for internal use by the
+ * HPTS code.
+ */
+void tcp_lro_hpts_init(void);
+void tcp_lro_hpts_uninit(void);
+
+#endif /* defined(_KERNEL) */
+#endif /* __tcp_hpts_internal_h__ */
diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c
new file mode 100644
index 000000000000..c5dc9cb5b03b
--- /dev/null
+++ b/sys/netinet/tcp_hpts_test.c
@@ -0,0 +1,1682 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <tests/ktest.h>
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
+#include <dev/tcp_log/tcp_log_dev.h>
+#include <netinet/tcp_log_buf.h>
+
+#undef tcp_hpts_init
+#undef tcp_hpts_remove
+#undef tcp_hpts_insert
+#undef tcp_set_hpts
+
+/* Custom definitions that take the tcp_hptsi */
+#define tcp_hpts_init(pace, tp) __tcp_hpts_init((pace), (tp))
+#define tcp_hpts_remove(pace, tp) __tcp_hpts_remove((pace), (tp))
+#define	tcp_hpts_insert(pace, tp, usecs, diag)	\
+	__tcp_hpts_insert((pace), (tp), (usecs), (diag))
+#define tcp_set_hpts(pace, tp) __tcp_set_hpts((pace), (tp))
+
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts_test", "TCP hpts test");
+
+static int test_exit_on_failure = true;
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts_test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "TCP HPTS test controls");
+SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW,
+    &test_exit_on_failure, 0,
+    "Exit HPTS test immediately on first failure (1) or continue running all tests (0)");
+
+#define KTEST_VERIFY(x) do { \
+	if (!(x)) { \
+		KTEST_ERR(ctx, "FAIL: %s", #x); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s", #x); \
+	} \
+} while (0)
+
+#define KTEST_EQUAL(x, y) do { \
+	if ((x) != (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s != %s (%d != %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s == %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_NEQUAL(x, y) do { \
+	if ((x) == (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s == %s (%d == %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s != %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_GREATER_THAN(x, y) do { \
+	if ((x) <= (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s <= %s (%d <= %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s > %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_VERIFY_RET(x, y) do { \
+	if (!(x)) { \
+		KTEST_ERR(ctx, "FAIL: %s", #x); \
+		if (test_exit_on_failure) \
+			return (y); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s", #x); \
+	} \
+} while (0)
+
+#ifdef TCP_HPTS_KTEST
+
+static void
+dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts)
+{
+	KTEST_LOG(ctx, "tcp_hpts_entry(%p)", hpts);
+	KTEST_LOG(ctx, "  p_cur_slot: %u", hpts->p_cur_slot);
+	KTEST_LOG(ctx, "  p_prev_slot: %u", hpts->p_prev_slot);
+	KTEST_LOG(ctx, "  p_nxt_slot: %u", hpts->p_nxt_slot);
+	KTEST_LOG(ctx, "  p_runningslot: %u", hpts->p_runningslot);
+	KTEST_LOG(ctx, "  p_on_queue_cnt: %d", hpts->p_on_queue_cnt);
+	KTEST_LOG(ctx, "  p_hpts_active: %u", hpts->p_hpts_active);
+	KTEST_LOG(ctx, "  p_wheel_complete: %u", hpts->p_wheel_complete);
+	KTEST_LOG(ctx, "  p_direct_wake: %u", hpts->p_direct_wake);
+	KTEST_LOG(ctx, "  p_on_min_sleep: %u", hpts->p_on_min_sleep);
+	KTEST_LOG(ctx, "  p_hpts_wake_scheduled: %u", hpts->p_hpts_wake_scheduled);
+	KTEST_LOG(ctx, "  hit_callout_thresh: %u", hpts->hit_callout_thresh);
+	KTEST_LOG(ctx, "  p_hpts_sleep_time: %u", hpts->p_hpts_sleep_time);
+	KTEST_LOG(ctx, "  p_delayed_by: %u", hpts->p_delayed_by);
+	KTEST_LOG(ctx, "  overidden_sleep: %u", hpts->overidden_sleep);
+	KTEST_LOG(ctx, "  saved_curslot: %u", hpts->saved_curslot);
+	KTEST_LOG(ctx, "  saved_prev_slot: %u", hpts->saved_prev_slot);
+	KTEST_LOG(ctx, "  syscall_cnt: %lu", hpts->syscall_cnt);
+	KTEST_LOG(ctx, "  sleeping: %lu", hpts->sleeping);
+	KTEST_LOG(ctx, "  p_cpu: %u", hpts->p_cpu);
+	KTEST_LOG(ctx, "  ie_cookie: %p", hpts->ie_cookie);
+	KTEST_LOG(ctx, "  p_hptsi: %p", hpts->p_hptsi);
+	KTEST_LOG(ctx, "  p_mysleep: %ld.%06ld", hpts->p_mysleep.tv_sec, hpts->p_mysleep.tv_usec);
+}
+
+static void
+dump_tcpcb(struct tcpcb *tp)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct inpcb *inp = &tp->t_inpcb;
+
+	KTEST_LOG(ctx, "tcp_control_block(%p)", tp);
+
+	/* HPTS-specific fields */
+	KTEST_LOG(ctx, "  t_in_hpts: %d", tp->t_in_hpts);
+	KTEST_LOG(ctx, "  t_hpts_cpu: %u", tp->t_hpts_cpu);
+	KTEST_LOG(ctx, "  t_hpts_slot: %d", tp->t_hpts_slot);
+	KTEST_LOG(ctx, "  t_hpts_gencnt: %u", tp->t_hpts_gencnt);
+	KTEST_LOG(ctx, "  t_hpts_request: %u", tp->t_hpts_request);
+
+	/* LRO CPU field */
+	KTEST_LOG(ctx, "  t_lro_cpu: %u", tp->t_lro_cpu);
+
+	/* TCP flags that affect HPTS */
+	KTEST_LOG(ctx, "  t_flags2: 0x%x", tp->t_flags2);
+	KTEST_LOG(ctx, "    TF2_HPTS_CPU_SET: %s", (tp->t_flags2 & TF2_HPTS_CPU_SET) ? "YES" : "NO");
+	KTEST_LOG(ctx, "    TF2_HPTS_CALLS: %s", (tp->t_flags2 & TF2_HPTS_CALLS) ? "YES" : "NO");
+	KTEST_LOG(ctx, "    TF2_SUPPORTS_MBUFQ: %s", (tp->t_flags2 & TF2_SUPPORTS_MBUFQ) ? "YES" : "NO");
+
+	/* Input PCB fields that HPTS uses */
+	KTEST_LOG(ctx, "  inp_flags: 0x%x", inp->inp_flags);
+	KTEST_LOG(ctx, "    INP_DROPPED: %s", (inp->inp_flags & INP_DROPPED) ? "YES" : "NO");
+	KTEST_LOG(ctx, "  inp_flowid: 0x%x", inp->inp_flowid);
+	KTEST_LOG(ctx, "  inp_flowtype: %u", inp->inp_flowtype);
+	KTEST_LOG(ctx, "  inp_numa_domain: %d", inp->inp_numa_domain);
+}
+
+/* Enum for call counting indices */
+enum test_call_counts {
+	CCNT_MICROUPTIME = 0,
+	CCNT_SWI_ADD,
+	CCNT_SWI_REMOVE,
+	CCNT_SWI_SCHED,
+	CCNT_INTR_EVENT_BIND,
+	CCNT_INTR_EVENT_BIND_CPUSET,
+	CCNT_CALLOUT_INIT,
+	CCNT_CALLOUT_RESET_SBT_ON,
+	CCNT_CALLOUT_STOP_SAFE,
+	CCNT_TCP_OUTPUT,
+	CCNT_TCP_TFB_DO_QUEUED_SEGMENTS,
+	CCNT_MAX
+};
+
+static uint32_t call_counts[CCNT_MAX];
+
+static uint64_t test_time_usec = 0;
+
+/*
+ * Reset all test global variables to a clean state.
+ */
+static void
+test_hpts_init(void)
+{
+	memset(call_counts, 0, sizeof(call_counts));
+	test_time_usec = 0;
+}
+
+static void
+test_microuptime(struct timeval *tv)
+{
+	call_counts[CCNT_MICROUPTIME]++;
+	tv->tv_sec = test_time_usec / 1000000;
+	tv->tv_usec = test_time_usec % 1000000;
+}
+
+static int
+test_swi_add(struct intr_event **eventp, const char *name,
+    driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+    void **cookiep)
+{
+	call_counts[CCNT_SWI_ADD]++;
+	/* Simulate successful SWI creation */
+	*eventp = (struct intr_event *)0xfeedface; /* Mock event */
+	*cookiep = (void *)0xdeadbeef; /* Mock cookie */
+	return (0);
+}
+
+static int
+test_swi_remove(void *cookie)
+{
+	call_counts[CCNT_SWI_REMOVE]++;
+	/* Simulate successful removal */
+	return (0);
+}
+
+static void
+test_swi_sched(void *cookie, int flags)
+{
+	call_counts[CCNT_SWI_SCHED]++;
+	/* Simulate successful SWI scheduling */
+}
+
+static int
+test_intr_event_bind(struct intr_event *ie, int cpu)
+{
+	call_counts[CCNT_INTR_EVENT_BIND]++;
+	/* Simulate successful binding */
+	return (0);
+}
+
+static int
+test_intr_event_bind_ithread_cpuset(struct intr_event *ie, struct _cpuset *mask)
+{
+	call_counts[CCNT_INTR_EVENT_BIND_CPUSET]++;
+	/* Simulate successful cpuset binding */
+	return (0);
+}
+
+static void
+test_callout_init(struct callout *c, int mpsafe)
+{
+	call_counts[CCNT_CALLOUT_INIT]++;
+	memset(c, 0, sizeof(*c));
+}
+
+static int
+test_callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
+    void (*func)(void *), void *arg, int cpu, int flags)
+{
+	call_counts[CCNT_CALLOUT_RESET_SBT_ON]++;
+	/* Return 1 to simulate successful timer scheduling */
+	return (1);
+}
+
+static int
+test_callout_stop_safe(struct callout *c, int flags)
+{
+	call_counts[CCNT_CALLOUT_STOP_SAFE]++;
+	/* Return 1 to simulate successful timer stopping */
+	return (1);
+}
+
+static const struct tcp_hptsi_funcs test_funcs = {
+	.microuptime = test_microuptime,
+	.swi_add = test_swi_add,
+	.swi_remove = test_swi_remove,
+	.swi_sched = test_swi_sched,
+	.intr_event_bind = test_intr_event_bind,
+	.intr_event_bind_ithread_cpuset = test_intr_event_bind_ithread_cpuset,
+	.callout_init = test_callout_init,
+	.callout_reset_sbt_on = test_callout_reset_sbt_on,
+	._callout_stop_safe = test_callout_stop_safe,
+};
+
+#define TP_REMOVE_FROM_HPTS(tp) tp->bits_spare
+#define TP_LOG_TEST(tp) tp->t_log_state_set
+
+static int
+test_tcp_output(struct tcpcb *tp)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+	struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	call_counts[CCNT_TCP_OUTPUT]++;
+	if (TP_LOG_TEST(tp)) {
+		KTEST_LOG(ctx, "=> tcp_output(%p)", tp);
+		dump_tcpcb(tp);
+		dump_hpts_entry(ctx, hpts);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+		if (TP_LOG_TEST(tp))
+			KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+		tcp_hpts_remove(pace, tp);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+		INP_WUNLOCK(&tp->t_inpcb); /* tcp_output unlocks on error */
+		return (-1); /* Simulate tcp_output error */
+	}
+
+	return (0);
+}
+
+static int
+test_tfb_do_queued_segments(struct tcpcb *tp, int flag)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+	struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS]++;
+	KTEST_LOG(ctx, "=> tfb_do_queued_segments(%p, %d)", tp, flag);
+	dump_tcpcb(tp);
+	dump_hpts_entry(ctx, hpts);
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+		if (TP_LOG_TEST(tp))
+			KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+		tcp_hpts_remove(pace, tp);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+		INP_WUNLOCK(&tp->t_inpcb); /* do_queued_segments unlocks on error */
+		return (-1); /* Simulate do_queued_segments error */
+	}
+
+	return (0);
+}
+
+static struct tcp_function_block test_tcp_fb = {
+	.tfb_tcp_block_name = "hpts_test_tcp",
+	.tfb_tcp_output = test_tcp_output,
+	.tfb_do_queued_segments = test_tfb_do_queued_segments,
+};
+
+/*
+ * Create a minimally initialized tcpcb that can be safely inserted into HPTS.
+ * This function allocates and initializes all the fields that HPTS code
+ * reads or writes.
+ */
+static struct tcpcb *
+test_hpts_create_tcpcb(struct ktest_test_context *ctx, struct tcp_hptsi *pace)
+{
+	struct tcpcb *tp;
+
+	tp = malloc(sizeof(struct tcpcb), M_TCPHPTS, M_WAITOK | M_ZERO);
+	if (tp) {
+		rw_init_flags(&tp->t_inpcb.inp_lock, "test-inp",
+			RW_RECURSE | RW_DUPOK);
+		refcount_init(&tp->t_inpcb.inp_refcount, 1);
+		tp->t_inpcb.inp_pcbinfo = &V_tcbinfo;
+		tp->t_fb = &test_tcp_fb;
+		tp->t_hpts_cpu = HPTS_CPU_NONE;
+		STAILQ_INIT(&tp->t_inqueue);
+		tcp_hpts_init(pace, tp);
+
+		/* Stuff some pointers in the tcb for test purposes. */
+		tp->t_fb_ptr = ctx;
+		tp->t_tfo_pending = (unsigned int*)pace;
+	}
+
+	return (tp);
+}
+
+/*
+ * Free a test tcpcb created by test_hpts_create_tcpcb()
+ */
+static void
+test_hpts_free_tcpcb(struct tcpcb *tp)
+{
+	if (tp == NULL)
+		return;
+
+	INP_LOCK_DESTROY(&tp->t_inpcb);
+	free(tp, M_TCPHPTS);
+}
+
+/*
+ * ***********************************************
+ * * KTEST functions for testing the HPTS module *
+ * ***********************************************
+ */
+
+/*
+ * Validates that the HPTS module is properly loaded and initialized by checking
+ * that the minimum HPTS time is configured.
+ */
+KTEST_FUNC(module_load)
+{
+	test_hpts_init();
+	KTEST_NEQUAL(tcp_min_hptsi_time, 0);
+	KTEST_VERIFY(tcp_bind_threads >= 0 && tcp_bind_threads <= 2);
+	KTEST_NEQUAL(tcp_hptsi_pace, NULL);
+	return (0);
+}
+
+/*
+ * Validates the creation and destruction of tcp_hptsi structures, ensuring
+ * proper initialization of internal fields and clean destruction.
+ */
+KTEST_FUNC(hptsi_create_destroy)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	KTEST_NEQUAL(pace->rp_ent, NULL);
+	KTEST_NEQUAL(pace->cts_last_ran, NULL);
+	KTEST_VERIFY(pace->rp_num_hptss > 0);
+	KTEST_VERIFY(pace->rp_num_hptss <= MAXCPU); /* Reasonable upper bound */
+	KTEST_VERIFY(pace->grp_cnt >= 1); /* At least one group */
+	KTEST_EQUAL(pace->funcs, &test_funcs); /* Verify function pointer was set */
+
+	/* Verify individual HPTS entries are properly initialized */
+	for (uint32_t i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_NEQUAL(pace->rp_ent[i], NULL);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cpu, i);
+		KTEST_EQUAL(pace->rp_ent[i]->p_hptsi, pace);
+		KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+	}
+
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcp_hptsi structures can be started and stopped properly,
+ * including verification that threads are created during start and cleaned up
+ * during stop operations.
+ */
+KTEST_FUNC(hptsi_start_stop)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+
+	tcp_hptsi_start(pace);
+
+	/* Verify that entries have threads started */
+	struct tcp_hpts_entry *hpts = pace->rp_ent[0];
+	KTEST_NEQUAL(hpts->ie_cookie, NULL);  /* Should have SWI handler */
+	KTEST_EQUAL(hpts->p_hptsi, pace);     /* Should point to our pace */
+
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that multiple tcp_hptsi instances can coexist independently, with
+ * different configurations and CPU assignments without interfering with each
+ * other.
+ */
+KTEST_FUNC(hptsi_independence)
+{
+	struct tcp_hptsi *pace1, *pace2;
+	uint16_t cpu1, cpu2;
+
+	test_hpts_init();
+
+	pace1 = tcp_hptsi_create(&test_funcs, false);
+	pace2 = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace1, NULL);
+	KTEST_NEQUAL(pace2, NULL);
+	KTEST_NEQUAL(pace2->rp_ent, NULL);
+
+	cpu1 = tcp_hptsi_random_cpu(pace1);
+	cpu2 = tcp_hptsi_random_cpu(pace2);
+	KTEST_VERIFY(cpu1 < pace1->rp_num_hptss);
+	KTEST_VERIFY(cpu2 < pace2->rp_num_hptss);
+
+	/* Verify both instances have independent entry arrays */
+	KTEST_NEQUAL(pace1->rp_ent, pace2->rp_ent);
+	/* Verify they may have different CPU counts but both reasonable */
+	KTEST_VERIFY(pace1->rp_num_hptss > 0 && pace1->rp_num_hptss <= MAXCPU);
+	KTEST_VERIFY(pace2->rp_num_hptss > 0 && pace2->rp_num_hptss <= MAXCPU);
+
+	tcp_hptsi_destroy(pace1);
+	tcp_hptsi_destroy(pace2);
+
+	return (0);
+}
+
+/*
+ * Validates that custom function injection works correctly, ensuring that
+ * test-specific implementations of microuptime and others are properly
+ * called by the HPTS system.
+ */
+KTEST_FUNC(function_injection)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	KTEST_EQUAL(pace->funcs, &test_funcs);
+	KTEST_VERIFY(call_counts[CCNT_MICROUPTIME] > 0);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_INIT] > 0);
+
+	tcp_hptsi_start(pace);
+	KTEST_VERIFY(call_counts[CCNT_SWI_ADD] > 0);
+	KTEST_VERIFY(tcp_bind_threads == 0 ||
+	    call_counts[CCNT_INTR_EVENT_BIND] > 0 ||
+	    call_counts[CCNT_INTR_EVENT_BIND_CPUSET] > 0);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] > 0);
+
+	tcp_hptsi_stop(pace);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_STOP_SAFE] > 0);
+	KTEST_VERIFY(call_counts[CCNT_SWI_REMOVE] > 0);
+
+	tcp_hptsi_destroy(pace);
+
+	/* Verify we have a reasonable balance of create/destroy calls */
+	KTEST_EQUAL(call_counts[CCNT_SWI_ADD], call_counts[CCNT_SWI_REMOVE]);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] <= call_counts[CCNT_CALLOUT_STOP_SAFE]);
+
+	return (0);
+}
+
+/*
+ * Validates that a tcpcb can be properly initialized for HPTS compatibility,
+ * ensuring all required fields are set correctly and function pointers are
+ * valid for safe HPTS operations.
+ */
+KTEST_FUNC(tcpcb_initialization)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Verify the tcpcb is properly initialized for HPTS */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	KTEST_NEQUAL(tp->t_fb, NULL);
+	KTEST_NEQUAL(tp->t_fb->tfb_tcp_output, NULL);
+	KTEST_NEQUAL(tp->t_fb->tfb_do_queued_segments, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL((tp->t_flags2 & (TF2_HPTS_CPU_SET | TF2_HPTS_CALLS)), 0);
+
+	/* Verify that HPTS-specific fields are initialized */
+	KTEST_EQUAL(tp->t_hpts_gencnt, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, 0);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_lro_cpu, 0);
+	KTEST_VERIFY(tp->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_EQUAL(tp->t_inpcb.inp_refcount, 1);
+	KTEST_VERIFY(!(tp->t_inpcb.inp_flags & INP_DROPPED));
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcpcb structures can be successfully inserted into and removed
+ * from the HPTS wheel, with proper state tracking and slot assignment during
+ * the process.
+ */
+KTEST_FUNC(tcpcb_insertion)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+	uint32_t timeout_usecs = 10;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL((tp->t_flags2 & TF2_HPTS_CALLS), 0);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0);
+	tcp_hpts_insert(pace, tp, timeout_usecs, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+	KTEST_VERIFY(tcp_in_hpts(tp));
+	KTEST_VERIFY(tp->t_hpts_slot >= 0);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(timeout_usecs));
+	//KTEST_EQUAL(tp->t_hpts_gencnt, 1);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_VERIFY(!tcp_in_hpts(tp));
+
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the core HPTS timer functionality by verifying that scheduled
+ * tcpcb entries trigger tcp_output calls at appropriate times, simulating
+ * real-world timer-driven TCP processing.
+ */
+KTEST_FUNC(timer_functionality)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	struct tcpcb *tp;
+	int32_t slots_ran;
+	uint32_t i;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	/* Create and insert the tcpcb into the HPTS wheel to wait for 500 usec */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	dump_tcpcb(tp);
+	TP_LOG_TEST(tp) = 1; /* Enable logging for this tcpcb */
+
+	KTEST_LOG(ctx, "=> tcp_hpts_insert(%p)", tp);
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS; /* Mark as needing HPTS processing */
+	tcp_hpts_insert(pace, tp, 500, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	dump_tcpcb(tp);
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+	KTEST_EQUAL(hpts->p_prev_slot, 0);
+	KTEST_EQUAL(hpts->p_cur_slot, 0);
+	KTEST_EQUAL(hpts->p_runningslot, 0);
+	KTEST_EQUAL(hpts->p_nxt_slot, 1);
+	KTEST_EQUAL(hpts->p_hpts_active, 0);
+
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+
+	/* Set our test flag to indicate the tcpcb should be removed from the
+	 * wheel when tcp_output is called. */
+	TP_REMOVE_FROM_HPTS(tp) = 1;
+
+	/* Test early exit condition: advance time by insufficient amount */
+	KTEST_LOG(ctx, "Testing early exit with insufficient time advancement");
+	test_time_usec += 1; /* Very small advancement - should cause early exit */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Should return 0 slots due to insufficient time advancement */
+	KTEST_EQUAL(slots_ran, 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); /* No processing should occur */
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); /* Connection still queued */
+
+	/* Wait for 498 more usecs and trigger the HPTS workers and verify
+	 * nothing happens yet (total 499 usec) */
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	test_time_usec += 498;
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+		KTEST_VERIFY(slots_ran >= 0);
+		KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 49);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 49);
+	}
+
+	dump_tcpcb(tp);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+	/* Wait for 1 more usec and trigger the HPTS workers and verify it
+	 * triggers tcp_output this time */
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	test_time_usec += 1;
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+		KTEST_VERIFY(slots_ran >= 0);
+		KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 50);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 50);
+	}
+
+	dump_tcpcb(tp);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS scalability by creating and inserting a LOT of tcpcbs into
+ * the HPTS wheel, testing performance under high load conditions.
+ */
+KTEST_FUNC(scalability_tcpcbs)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	uint32_t i, num_tcpcbs = 100000, total_queued = 0;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Allocate array to hold pointers to all tcpcbs */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	/* Create a LOT of tcpcbs */
+	KTEST_LOG(ctx, "Creating %u tcpcbs...", num_tcpcbs);
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		if (tcpcbs[i] == NULL) {
+			KTEST_ERR(ctx, "FAIL: tcpcbs[i] == NULL");
+			return (EINVAL);
+		}
+	}
+
+	/* Insert all created tcpcbs into HPTS */
+	KTEST_LOG(ctx, "Inserting all tcpcbs into HPTS...");
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		/* Insert with varying future timeouts to distribute across slots */
+		tcp_hpts_insert(pace, tcpcbs[i], 100 + (i % 1000), NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Verify total queue counts across all CPUs */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		total_queued += pace->rp_ent[i]->p_on_queue_cnt;
+	}
+	KTEST_EQUAL(total_queued, num_tcpcbs);
+
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	/* Remove all tcpcbs from HPTS */
+	KTEST_LOG(ctx, "Removing all tcpcbs from HPTS...");
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+			tcp_hpts_remove(pace, tcpcbs[i]);
+		}
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Verify all queues are now empty */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		if (pace->rp_ent[i]->p_on_queue_cnt != 0) {
+			KTEST_ERR(ctx, "FAIL: pace->rp_ent[i]->p_on_queue_cnt != 0");
+			return (EINVAL);
+		}
+	}
+
+	for (i = 0; i < num_tcpcbs; i++) {
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates wheel wrap scenarios where the timer falls significantly behind
+ * and needs to process more than one full wheel revolution worth of slots.
+ */
+KTEST_FUNC(wheel_wrap_recovery)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	uint32_t i, timeout_usecs, num_tcpcbs = 500;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Allocate array to hold pointers to tcpcbs */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	/* Create tcpcbs and insert them across many slots */
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		KTEST_NEQUAL(tcpcbs[i], NULL);
+		TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1;
+
+		timeout_usecs = ((i * NUM_OF_HPTSI_SLOTS) / num_tcpcbs) * HPTS_USECS_PER_SLOT; /* Spread across slots */
+
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		tcp_hpts_insert(pace, tcpcbs[i], timeout_usecs, NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Fast forward time significantly to trigger wheel wrap */
+	test_time_usec += (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT;
+
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%u)", i);
+		KTEST_NEQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		KTEST_EQUAL(slots_ran, NUM_OF_HPTSI_SLOTS-1); /* Should process all slots */
+		KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+		KTEST_NEQUAL(pace->rp_ent[i]->p_cur_slot,
+			pace->rp_ent[i]->p_prev_slot);
+	}
+
+	/* Cleanup */
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+			tcp_hpts_remove(pace, tcpcbs[i]);
+		}
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates proper handling of tcpcbs in the IHPTS_MOVING state, which occurs
+ * when a tcpcb is being processed by the HPTS thread but gets removed.
+ */
+KTEST_FUNC(tcpcb_moving_state)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2;
+	struct tcp_hpts_entry *hpts;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Create two tcpcbs on the same CPU/slot */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	KTEST_NEQUAL(tp2, NULL);
+
+	/* Force them to the same CPU for predictable testing */
+	tp1->t_hpts_cpu = 0;
+	tp2->t_hpts_cpu = 0;
+
+	/* Insert both into the same slot */
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, 100, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, 100, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	hpts = pace->rp_ent[0];
+
+	/* Manually transition tp1 to MOVING state to simulate race condition */
+	HPTS_LOCK(hpts);
+	tp1->t_in_hpts = IHPTS_MOVING;
+	tp1->t_hpts_slot = -1; /* Mark for removal */
+	HPTS_UNLOCK(hpts);
+
+	/* Set time and run HPTS to process the moving state */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	KTEST_VERIFY(slots_ran >= 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); /* Shouldn't call on both */
+
+	/* tp1 should be cleaned up and removed */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+	/* tp2 should have been processed normally */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcpcbs with deferred requests (t_hpts_request > 0) are
+ * properly handled and re-inserted into appropriate future slots after
+ * the wheel processes enough slots to accommodate the original request.
+ */
+KTEST_FUNC(deferred_requests)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp, *tp2;
+	struct tcp_hpts_entry *hpts;
+	uint32_t large_timeout_usecs = (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT; /* Beyond wheel capacity */
+	uint32_t huge_timeout_usecs = (NUM_OF_HPTSI_SLOTS * 3) * HPTS_USECS_PER_SLOT; /* 3x wheel capacity */
+	uint32_t initial_request;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+
+	/* Insert with a request that exceeds current wheel capacity */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, large_timeout_usecs, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Verify it was inserted with a deferred request */
+	dump_tcpcb(tp);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_VERIFY(tp->t_hpts_request > 0);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Advance time to process deferred requests */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+
+	/* Process the wheel to handle deferred requests */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	dump_hpts_entry(ctx, hpts);
+	KTEST_GREATER_THAN(slots_ran, 0);
+	dump_tcpcb(tp);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+
+	/* Test incremental deferred request processing over multiple cycles */
+	KTEST_LOG(ctx, "Testing incremental deferred request processing");
+
+	/* Create a new connection with an even larger request */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_hpts_cpu = tp->t_hpts_cpu; /* Same CPU for predictable testing */
+
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, huge_timeout_usecs, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify initial deferred request */
+	initial_request = tp2->t_hpts_request;
+	KTEST_VERIFY(initial_request > NUM_OF_HPTSI_SLOTS);
+
+	/* Process one wheel cycle - should reduce but not eliminate request */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Request should be reduced but not zero */
+	KTEST_GREATER_THAN(initial_request, tp2->t_hpts_request);
+	KTEST_VERIFY(tp2->t_hpts_request > 0);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); /* Still queued */
+
+	/* For huge_timeout_usecs = NUM_OF_HPTSI_SLOTS * 3 * HPTS_USECS_PER_SLOT, we need ~3 cycles to complete.
+	 * Each cycle can reduce the request by at most NUM_OF_HPTSI_SLOTS. */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* After second cycle, request should be reduced significantly (likely by ~NUM_OF_HPTSI_SLOTS) */
+	KTEST_VERIFY(tp2->t_hpts_request < initial_request);
+	KTEST_VERIFY(tp2->t_hpts_request > 0); /* But not yet zero for such a large request */
+
+	/* Clean up second connection */
+	INP_WLOCK(&tp2->t_inpcb);
+	if (tp2->t_in_hpts != IHPTS_NONE) {
+		tcp_hpts_remove(pace, tp2);
+	}
+	INP_WUNLOCK(&tp2->t_inpcb);
+	test_hpts_free_tcpcb(tp2);
+
+	/* Clean up */
+	INP_WLOCK(&tp->t_inpcb);
+	if (tp->t_in_hpts != IHPTS_NONE) {
+		tcp_hpts_remove(pace, tp);
+	}
+	INP_WUNLOCK(&tp->t_inpcb);
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates CPU assignment and affinity mechanisms, including flowid-based
+ * assignment, random fallback scenarios, and explicit CPU setting. Tests
+ * the actual cpu assignment logic in hpts_cpuid via tcp_set_hpts.
+ */
+KTEST_FUNC(cpu_assignment)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2, *tp2_dup, *tp3;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+
+	/* Test random CPU assignment (no flowid) */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	tp1->t_inpcb.inp_flowtype = M_HASHTYPE_NONE;
+	INP_WLOCK(&tp1->t_inpcb);
+	tcp_set_hpts(pace, tp1);
+	INP_WUNLOCK(&tp1->t_inpcb);
+	KTEST_VERIFY(tp1->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_VERIFY(tp1->t_flags2 & TF2_HPTS_CPU_SET);
+
+	/* Test flowid-based assignment */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+	tp2->t_inpcb.inp_flowid = 12345;
+	INP_WLOCK(&tp2->t_inpcb);
+	tcp_set_hpts(pace, tp2);
+	INP_WUNLOCK(&tp2->t_inpcb);
+	KTEST_VERIFY(tp2->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_VERIFY(tp2->t_flags2 & TF2_HPTS_CPU_SET);
+
+	/* With the same flowid, should get same CPU assignment */
+	tp2_dup = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2_dup, NULL);
+	tp2_dup->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+	tp2_dup->t_inpcb.inp_flowid = 12345;
+	INP_WLOCK(&tp2_dup->t_inpcb);
+	tcp_set_hpts(pace, tp2_dup);
+	INP_WUNLOCK(&tp2_dup->t_inpcb);
+	KTEST_EQUAL(tp2_dup->t_hpts_cpu, tp2->t_hpts_cpu);
+
+	/* Test explicit CPU setting */
+	tp3 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp3, NULL);
+	tp3->t_hpts_cpu = 1; /* Assume we have at least 2 CPUs */
+	tp3->t_flags2 |= TF2_HPTS_CPU_SET;
+	INP_WLOCK(&tp3->t_inpcb);
+	tcp_set_hpts(pace, tp3);
+	INP_WUNLOCK(&tp3->t_inpcb);
+	KTEST_EQUAL(tp3->t_hpts_cpu, 1);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	test_hpts_free_tcpcb(tp2_dup);
+	test_hpts_free_tcpcb(tp3);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates edge cases in slot calculation including boundary conditions
+ * around slot 0, maximum slots, and slot wrapping arithmetic.
+ */
+KTEST_FUNC(slot_boundary_conditions)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Test insertion at slot 0 */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, 0, NULL); /* Should insert immediately (0 timeout) */
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Test insertion at maximum slot value */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, (NUM_OF_HPTSI_SLOTS - 1) * HPTS_USECS_PER_SLOT, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Test very small timeout values */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, 1, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(1)); /* Should convert 1 usec to slot */
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS behavior under high load conditions, including proper
+ * processing of many connections and connection count tracking.
+ */
+KTEST_FUNC(dynamic_sleep_adjustment)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	struct tcp_hpts_entry *hpts;
+	uint32_t i, num_tcpcbs = DEFAULT_CONNECTION_THRESHOLD + 50;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Create many connections to exceed threshold */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		KTEST_NEQUAL(tcpcbs[i], NULL);
+		tcpcbs[i]->t_hpts_cpu = 0; /* Force all to CPU 0 */
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1; /* Will be removed after output */
+		tcp_hpts_insert(pace, tcpcbs[i], 100, NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	hpts = pace->rp_ent[0];
+	dump_hpts_entry(ctx, hpts);
+
+	/* Verify we're above threshold */
+	KTEST_GREATER_THAN(hpts->p_on_queue_cnt, DEFAULT_CONNECTION_THRESHOLD);
+
+	/* Run HPTS to process many connections */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Verify HPTS processed slots and connections correctly */
+	KTEST_GREATER_THAN(slots_ran, 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], num_tcpcbs);
+
+	/* Verify all connections were removed from queue */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	/* Cleanup */
+	for (i = 0; i < num_tcpcbs; i++) {
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates handling of concurrent insert/remove operations and race conditions
+ * between HPTS processing and user operations.
+ */
+KTEST_FUNC(concurrent_operations)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2;
+	struct tcp_hpts_entry *hpts;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	KTEST_NEQUAL(tp2, NULL);
+
+	/* Force all to CPU 0 */
+	tp1->t_hpts_cpu = 0;
+	tp2->t_hpts_cpu = 0;
+
+	/* Insert tp1 */
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, 100, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Insert tp2 into same slot */
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, 100, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify both are inserted */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Verify they're both assigned to the same slot */
+	KTEST_EQUAL(tp1->t_hpts_slot, tp2->t_hpts_slot);
+
+	/* Verify queue count reflects both connections */
+	KTEST_EQUAL(tp1->t_hpts_cpu, tp2->t_hpts_cpu); /* Should be on same CPU */
+	hpts = pace->rp_ent[tp1->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 2);
+
+	/* Remove tp1 while tp2 is still there */
+	INP_WLOCK(&tp1->t_inpcb);
+	tcp_hpts_remove(pace, tp1);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Verify tp1 removed, tp2 still there */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Verify queue count decreased by one */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+	/* Remove tp2 */
+	INP_WLOCK(&tp2->t_inpcb);
+	tcp_hpts_remove(pace, tp2);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+	/* Verify queue is now completely empty */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the queued segments processing path via tfb_do_queued_segments,
+ * which is an alternative to direct tcp_output calls.
+ */
+KTEST_FUNC(queued_segments_processing)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+	struct mbuf *fake_mbuf;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+
+	/* Create a minimal fake mbuf that has valid STAILQ pointers */
+	fake_mbuf = malloc(sizeof(struct mbuf), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_NEQUAL(fake_mbuf, NULL);
+
+	/* Set up for queued segments path */
+	tp->t_flags2 |= (TF2_HPTS_CALLS | TF2_SUPPORTS_MBUFQ);
+	STAILQ_INSERT_TAIL(&tp->t_inqueue, fake_mbuf, m_stailqpkt);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_insert(pace, tp, 100, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Run HPTS and verify queued segments path is taken */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	KTEST_VERIFY(slots_ran >= 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS], 1);
+
+	/* Connection should be removed from HPTS after processing */
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+
+	/* Clean up the fake mbuf if it's still in the queue */
+	if (!STAILQ_EMPTY(&tp->t_inqueue)) {
+		struct mbuf *m = STAILQ_FIRST(&tp->t_inqueue);
+		STAILQ_REMOVE_HEAD(&tp->t_inqueue, m_stailqpkt);
+		free(m, M_TCPHPTS);
+	}
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the direct wake mechanism and wake inhibition logic when
+ * the connection count exceeds thresholds.
+ */
+KTEST_FUNC(direct_wake_mechanism)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Test direct wake when not over threshold */
+	HPTS_LOCK(hpts);
+	hpts->p_on_queue_cnt = 50; /* Below threshold */
+	hpts->p_hpts_wake_scheduled = 0;
+	tcp_hpts_wake(hpts);
+	KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 1);
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+	HPTS_UNLOCK(hpts);
+
+	/* Reset for next test */
+	hpts->p_hpts_wake_scheduled = 0;
+	call_counts[CCNT_SWI_SCHED] = 0;
+
+	/* Test wake inhibition when over threshold */
+	HPTS_LOCK(hpts);
+	hpts->p_on_queue_cnt = 200; /* Above threshold */
+	hpts->p_direct_wake = 1; /* Request direct wake */
+	tcp_hpts_wake(hpts);
+	KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 0); /* Should be inhibited */
+	KTEST_EQUAL(hpts->p_direct_wake, 0); /* Should be cleared */
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0); /* No SWI scheduled */
+	HPTS_UNLOCK(hpts);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS collision detection when attempting to run HPTS while
+ * it's already active.
+ */
+KTEST_FUNC(hpts_collision_detection)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	hpts = pace->rp_ent[0];
+
+	/* Mark HPTS as active */
+	HPTS_LOCK(hpts);
+	hpts->p_hpts_active = 1;
+	HPTS_UNLOCK(hpts);
+
+	/* Attempt to run HPTS again - should detect collision */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, false); /* from_callout = false */
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Should return 0 indicating no work done due to collision */
+	KTEST_EQUAL(slots_ran, 0);
+
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates generation count handling for race condition detection between
+ * HPTS processing and connection insertion/removal operations.
+ */
+KTEST_FUNC(generation_count_validation)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	struct tcpcb *tp1, *tp2;
+	uint32_t initial_gencnt, slot_to_test = 10;
+	uint32_t timeout_usecs = slot_to_test * HPTS_USECS_PER_SLOT;
+	uint32_t tp2_original_gencnt;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	hpts = pace->rp_ent[0];
+
+	/* Record initial generation count for the test slot */
+	initial_gencnt = hpts->p_hptss[slot_to_test].gencnt;
+
+	/* Create and insert first connection */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	tp1->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, timeout_usecs, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Verify connection stored the generation count */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp1->t_hpts_slot, slot_to_test);
+	KTEST_EQUAL(tp1->t_hpts_gencnt, initial_gencnt);
+
+	/* Create second connection but don't insert yet */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+	/* Force generation count increment by processing the slot */
+	test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Verify processing occurred */
+	KTEST_VERIFY(slots_ran > 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+
+	/* Verify generation count was incremented */
+	KTEST_EQUAL(hpts->p_hptss[slot_to_test].gencnt, initial_gencnt + 1);
+
+	/* Verify first connection was processed and removed */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+
+	/* Insert second connection and record its generation count */
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, timeout_usecs, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify connection was inserted successfully */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Record the generation count that tp2 received */
+	tp2_original_gencnt = tp2->t_hpts_gencnt;
+
+	/* Test generation count mismatch detection during processing */
+	/* Manually set stale generation count to simulate race condition */
+	tp2->t_hpts_gencnt = tp2_original_gencnt + 100; /* Force a mismatch */
+
+	/* Process the slot to trigger generation count validation */
+	test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Connection should be processed despite generation count mismatch */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); /* Processed and released */
+
+	/* The key test: HPTS should handle mismatched generation counts gracefully */
+	KTEST_VERIFY(slots_ran > 0); /* Processing should still occur */
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+	KTEST_INFO(module_load),
+	KTEST_INFO(hptsi_create_destroy),
+	KTEST_INFO(hptsi_start_stop),
+	KTEST_INFO(hptsi_independence),
+	KTEST_INFO(function_injection),
+	KTEST_INFO(tcpcb_initialization),
+	KTEST_INFO(tcpcb_insertion),
+	KTEST_INFO(timer_functionality),
+	KTEST_INFO(scalability_tcpcbs),
+	KTEST_INFO(wheel_wrap_recovery),
+	KTEST_INFO(tcpcb_moving_state),
+	KTEST_INFO(deferred_requests),
+	KTEST_INFO(cpu_assignment),
+	KTEST_INFO(slot_boundary_conditions),
+	KTEST_INFO(dynamic_sleep_adjustment),
+	KTEST_INFO(concurrent_operations),
+	KTEST_INFO(queued_segments_processing),
+	KTEST_INFO(direct_wake_mechanism),
+	KTEST_INFO(hpts_collision_detection),
+	KTEST_INFO(generation_count_validation),
+};
+
+#else /* TCP_HPTS_KTEST */
+
+/*
+ * Stub to indicate that the TCP HPTS ktest is not enabled.
+ */
+KTEST_FUNC(module_load_without_tests)
+{
+	KTEST_LOG(ctx, "Warning: TCP HPTS ktest is not enabled");
+	return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+	KTEST_INFO(module_load_without_tests),
+};
+
+#endif
+
+KTEST_MODULE_DECLARE(ktest_tcphpts, tests);
+KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index dd27ec77c1af..2146b0cac48f 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_autorcvbuf), 0,
     "Enable automatic receive buffer sizing");
 
-VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_max), 0,
     "Max size of automatic receive buffer");
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 64efa4bf060f..9b5baf115855 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
  	}
 
 	/* create sequence number */
-	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
-	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
-	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
-	    ((uint64_t)lc->lro_mbuf_count);
+	lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count;
+	if (M_HASHTYPE_ISHASH(mb))
+		lc->lro_mbuf_data[lc->lro_mbuf_count].seq |=
+		    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
+		    (((uint64_t)mb->m_pkthdr.flowid) << 24);
 
 	/* enter mbuf */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 43587285fe26..ac1a27a4290a 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -29,6 +29,8 @@
 #include "opt_inet6.h"
 
 #include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
@@ -62,6 +64,7 @@
 #include <netinet/tcp_lro.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
 #ifdef TCP_BLACKBOX
 #include <netinet/tcp_log_buf.h>
 #endif
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2dfb7faf56e3..208f72c4661c 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_inc), 0,
 	"Incrementor step size of automatic send buffer");
 
-VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index f2d7867df9b4..66983edcdd73 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -480,7 +480,7 @@ bbr_find_lowest_rsm(struct tcp_bbr *bbr);
 static __inline uint32_t
 bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);
 static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot,
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay,
 		 uint8_t which);
 static void
 bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
@@ -489,7 +489,7 @@ bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
 static void
 bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag);
 static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot,
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay,
 		    uint32_t del_by, uint32_t cts, uint32_t sloton,
 		    uint32_t prev_delay);
 static void
@@ -724,7 +724,7 @@ bbr_minseg(struct tcp_bbr *bbr)
 }
 
 static void
-bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t slot, uint32_t tot_len)
+bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t pacing_delay, uint32_t tot_len)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct hpts_diag diag;
@@ -751,40 +751,40 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 	bbr->r_ctl.rc_timer_exp = 0;
 	prev_delay = bbr->r_ctl.rc_last_delay_val;
 	if (bbr->r_ctl.rc_last_delay_val &&
-	    (slot == 0)) {
+	    (pacing_delay == 0)) {
 		/*
 		 * If a previous pacer delay was in place we
 		 * are not coming from the output side (where
 		 * we calculate a delay, more likely a timer).
 		 */
-		slot = bbr->r_ctl.rc_last_delay_val;
+		pacing_delay = bbr->r_ctl.rc_last_delay_val;
 		if (TSTMP_GT(cts, bbr->rc_pacer_started)) {
 			/* Compensate for time passed  */
 			delay_calc = cts - bbr->rc_pacer_started;
-			if (delay_calc <= slot)
-				slot -= delay_calc;
+			if (delay_calc <= pacing_delay)
+				pacing_delay -= delay_calc;
 		}
 	}
 	/* Do we have early to make up for by pushing out the pacing time? */
 	if (bbr->r_agg_early_set) {
-		bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, slot, 0, bbr->r_agg_early_set, 2);
-		slot += bbr->r_ctl.rc_agg_early;
+		bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, pacing_delay, 0, bbr->r_agg_early_set, 2);
+		pacing_delay += bbr->r_ctl.rc_agg_early;
 		bbr->r_ctl.rc_agg_early = 0;
 		bbr->r_agg_early_set = 0;
 	}
 	/* Are we running a total debt that needs to be compensated for? */
 	if (bbr->r_ctl.rc_hptsi_agg_delay) {
-		if (slot > bbr->r_ctl.rc_hptsi_agg_delay) {
+		if (pacing_delay > bbr->r_ctl.rc_hptsi_agg_delay) {
 			/* We nuke the delay */
-			slot -= bbr->r_ctl.rc_hptsi_agg_delay;
+			pacing_delay -= bbr->r_ctl.rc_hptsi_agg_delay;
 			bbr->r_ctl.rc_hptsi_agg_delay = 0;
 		} else {
 			/* We nuke some of the delay, put in a minimal 100usecs  */
-			bbr->r_ctl.rc_hptsi_agg_delay -= slot;
-			bbr->r_ctl.rc_last_delay_val = slot = 100;
+			bbr->r_ctl.rc_hptsi_agg_delay -= pacing_delay;
+			bbr->r_ctl.rc_last_delay_val = pacing_delay = 100;
 		}
 	}
-	bbr->r_ctl.rc_last_delay_val = slot;
+	bbr->r_ctl.rc_last_delay_val = pacing_delay;
 	hpts_timeout = bbr_timer_start(tp, bbr, cts);
 	if (tp->t_flags & TF_DELACK) {
 		if (bbr->rc_in_persist == 0) {
@@ -810,7 +810,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		bbr->r_ctl.rc_hpts_flags = PACE_TMR_DELACK;
 		hpts_timeout = delayed_ack;
 	}
-	if (slot) {
+	if (pacing_delay) {
 		/* Mark that we have a pacing timer up */
 		BBR_STAT_INC(bbr_paced_segments);
 		bbr->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
@@ -820,7 +820,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 	 * wheel, we resort to a keep-alive timer if its configured.
 	 */
 	if ((hpts_timeout == 0) &&
-	    (slot == 0)) {
+	    (pacing_delay == 0)) {
 		if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING)) {
 			/*
@@ -849,7 +849,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		if (left < hpts_timeout)
 			hpts_timeout = left;
 	}
-	if (bbr->r_ctl.rc_incr_tmrs && slot &&
+	if (bbr->r_ctl.rc_incr_tmrs && pacing_delay &&
 	    (bbr->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
 		/*
 		 * If configured to do so, and the timer is either
@@ -867,7 +867,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		 * this extra delay but this is easier and being more
 		 * conservative is probably better.
 		 */
-		hpts_timeout += slot;
+		hpts_timeout += pacing_delay;
 	}
 	if (hpts_timeout) {
 		/*
@@ -879,10 +879,10 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		bbr->r_ctl.rc_timer_exp = cts + hpts_timeout;
 	} else
 		bbr->r_ctl.rc_timer_exp = 0;
-	if ((slot) &&
+	if ((pacing_delay) &&
 	    (bbr->rc_use_google ||
 	     bbr->output_error_seen ||
-	     (slot <= hpts_timeout))  ) {
+	     (pacing_delay <= hpts_timeout))  ) {
 		/*
 		 * Tell LRO that it can queue packets while
 		 * we pace.
@@ -900,17 +900,15 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 			tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
 		bbr->rc_pacer_started = cts;
 
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, pacing_delay, &diag);
 		bbr->rc_timer_first = 0;
 		bbr->bbr_timer_src = frm;
-		bbr_log_to_start(bbr, cts, hpts_timeout, slot, 1);
+		bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 1);
 		bbr_log_hpts_diag(bbr, cts, &diag);
 	} else if (hpts_timeout) {
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, hpts_timeout, &diag);
 		/*
-		 * We add the flag here as well if the slot is set,
+		 * We add the flag here as well if the pacing delay is set,
 		 * since hpts will call in to clear the queue first before
 		 * calling the output routine (which does our timers).
 		 * We don't want to set the flag if its just a timer
@@ -919,7 +917,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		 * on a keep-alive timer and a request comes in for
 		 * more data.
 		 */
-		if (slot)
+		if (pacing_delay)
 			bbr->rc_pacer_started = cts;
 		if ((bbr->r_ctl.rc_hpts_flags & PACE_TMR_RACK) &&
 		    (bbr->rc_cwnd_limited == 0)) {
@@ -936,12 +934,12 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 			    TF2_DONT_SACK_QUEUE);
 		}
 		bbr->bbr_timer_src = frm;
-		bbr_log_to_start(bbr, cts, hpts_timeout, slot, 0);
+		bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 0);
 		bbr_log_hpts_diag(bbr, cts, &diag);
 		bbr->rc_timer_first = 1;
 	}
 	bbr->rc_tmr_stopped = 0;
-	bbr_log_type_bbrsnd(bbr, tot_len, slot, delay_calc, cts, frm, prev_delay);
+	bbr_log_type_bbrsnd(bbr, tot_len, pacing_delay, delay_calc, cts, frm, prev_delay);
 }
 
 static void
@@ -1033,8 +1031,8 @@ bbr_timer_audit(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, struct sock
 	}
 	/*
 	 * Ok the timer originally started is not what we want now. We will
-	 * force the hpts to be stopped if any, and restart with the slot
-	 * set to what was in the saved slot.
+	 * force the hpts to be stopped if any, and restart with the pacing
+	 * delay set to what was in the saved delay.
 	 */
 wrong_timer:
 	if ((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) {
@@ -2397,7 +2395,7 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
 		log.u_bbr.flex2 = diag->p_cur_slot;
 		log.u_bbr.flex3 = diag->slot_req;
 		log.u_bbr.flex4 = diag->inp_hptsslot;
-		log.u_bbr.flex5 = diag->slot_remaining;
+		log.u_bbr.flex5 = diag->time_remaining;
 		log.u_bbr.flex6 = diag->need_new_to;
 		log.u_bbr.flex7 = diag->p_hpts_active;
 		log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -2411,9 +2409,6 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
 		log.u_bbr.bw_inuse = diag->wheel_slot;
 		log.u_bbr.rttProp = diag->wheel_cts;
 		log.u_bbr.delRate = diag->maxslots;
-		log.u_bbr.cur_del_rate = diag->p_curtick;
-		log.u_bbr.cur_del_rate <<= 32;
-		log.u_bbr.cur_del_rate |= diag->p_lasttick;
 		TCP_LOG_EVENTP(bbr->rc_tp, NULL,
 		    &bbr->rc_inp->inp_socket->so_rcv,
 		    &bbr->rc_inp->inp_socket->so_snd,
@@ -2473,7 +2468,7 @@ bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len,
 }
 
 static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
 {
 	if (tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
@@ -2483,7 +2478,7 @@ bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, u
 		log.u_bbr.flex1 = bbr->bbr_timer_src;
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = bbr->r_ctl.rc_hpts_flags;
-		log.u_bbr.flex4 = slot;
+		log.u_bbr.flex4 = pacing_delay;
 		log.u_bbr.flex5 = bbr->rc_tp->t_hpts_slot;
 		log.u_bbr.flex6 = TICKS_2_USEC(bbr->rc_tp->t_rxtcur);
 		log.u_bbr.pkts_out = bbr->rc_tp->t_flags2;
@@ -2733,13 +2728,13 @@ bbr_type_log_hdwr_pacing(struct tcp_bbr *bbr, const struct ifnet *ifp,
 }
 
 static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
 {
 	if (tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = del_by;
 		log.u_bbr.flex3 = prev_delay;
 		log.u_bbr.flex4 = line;
@@ -5205,7 +5200,7 @@ bbr_process_timers(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, uint8_t
 		left = bbr->r_ctl.rc_timer_exp - cts;
 		ret = -3;
 		bbr_log_to_processing(bbr, cts, ret, left, hpts_calling);
-		tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(left));
+		tcp_hpts_insert(tp, left, NULL);
 		return (1);
 	}
 	bbr->rc_tmr_stopped = 0;
@@ -5254,7 +5249,7 @@ bbr_timer_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts)
 				else
 					time_since_send = 0;
 				if (bbr->r_ctl.rc_last_delay_val > time_since_send) {
-					/* Cut down our slot time */
+					/* Cut down our pacing_delay time */
 					bbr->r_ctl.rc_last_delay_val -= time_since_send;
 				} else {
 					bbr->r_ctl.rc_last_delay_val = 0;
@@ -5888,7 +5883,7 @@ bbr_log_output(struct tcp_bbr *bbr, struct tcpcb *tp, struct tcpopt *to, int32_t
 	 * sequence 1 for 10 bytes. In such an example the r_start would be
 	 * 1 (starting sequence) but the r_end would be r_start+len i.e. 11.
 	 * This means that r_end is actually the first sequence for the next
-	 * slot (11).
+	 * pacing delay (11).
 	 *
 	 */
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
@@ -11856,7 +11851,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
 	struct bbr_sendmap *rsm = NULL;
 	int32_t tso, mtu;
 	struct tcpopt to;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	struct inpcb *inp;
 	struct sockbuf *sb;
 	bool hpts_calling;
@@ -11986,8 +11981,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
 			delay_calc -= bbr->r_ctl.rc_last_delay_val;
 		else {
 			/*
-			 * We are early setup to adjust
-			 * our slot time.
+			 * We are early setup to adjust out pacing delay.
 			 */
 			uint64_t merged_val;
 
@@ -12104,7 +12098,7 @@ again:
 #endif
 	error = 0;
 	tso = 0;
-	slot = 0;
+	pacing_delay = 0;
 	mtu = 0;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 	sb_offset = tp->snd_max - tp->snd_una;
@@ -12126,7 +12120,7 @@ recheck_resend:
 			tot_len = tp->t_maxseg;
 			if (hpts_calling)
 				/* Retry in a ms */
-				slot = 1001;
+				pacing_delay = 1001;
 			goto just_return_nolock;
 		}
 		TAILQ_INSERT_TAIL(&bbr->r_ctl.rc_free, rsm, r_next);
@@ -12699,9 +12693,9 @@ just_return:
 	SOCK_SENDBUF_UNLOCK(so);
 just_return_nolock:
 	if (tot_len)
-		slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+		pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
 	if (bbr->rc_no_pacing)
-		slot = 0;
+		pacing_delay = 0;
 	if (tot_len == 0) {
 		if ((ctf_outstanding(tp) + min((bbr->r_ctl.rc_high_rwnd/2), bbr_minseg(bbr))) >=
 		    tp->snd_wnd) {
@@ -12751,7 +12745,7 @@ just_return_nolock:
 	/* Dont update the time if we did not send */
 	bbr->r_ctl.rc_last_delay_val = 0;
 	bbr->rc_output_starts_timer = 1;
-	bbr_start_hpts_timer(bbr, tp, cts, 9, slot, tot_len);
+	bbr_start_hpts_timer(bbr, tp, cts, 9, pacing_delay, tot_len);
 	bbr_log_type_just_return(bbr, cts, tot_len, hpts_calling, app_limited, p_maxseg, len);
 	if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
 		/* Make sure snd_nxt is drug up */
@@ -12787,7 +12781,7 @@ send:
 				flags &= ~TH_FIN;
 				if ((len == 0) && ((tp->t_flags & TF_ACKNOW) == 0)) {
 					/* Lets not send this */
-					slot = 0;
+					pacing_delay = 0;
 					goto just_return;
 				}
 			}
@@ -13053,7 +13047,7 @@ send:
 		/*
 		 * We have outstanding data, don't send a fin by itself!.
 		 */
-		slot = 0;
+		pacing_delay = 0;
 		goto just_return;
 	}
 	/*
@@ -13763,7 +13757,7 @@ nomore:
 				if (tp->snd_cwnd < maxseg)
 					tp->snd_cwnd = maxseg;
 			}
-			slot = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
+			pacing_delay = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
 			BBR_STAT_INC(bbr_saw_enobuf);
 			if (bbr->bbr_hdrw_pacing)
 				counter_u64_add(bbr_hdwr_pacing_enobuf, 1);
@@ -13812,18 +13806,18 @@ nomore:
 				}
 				/*
 				 * Nuke all other things that can interfere
-				 * with slot
+				 * with pacing delay
 				 */
 				if ((tot_len + len) && (len >= tp->t_maxseg)) {
-					slot = bbr_get_pacing_delay(bbr,
+					pacing_delay = bbr_get_pacing_delay(bbr,
 					    bbr->r_ctl.rc_bbr_hptsi_gain,
 					    (tot_len + len), cts, 0);
-					if (slot < bbr_error_base_paceout)
-						slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+					if (pacing_delay < bbr_error_base_paceout)
+						pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
 				} else
-					slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+					pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
 				bbr->rc_output_starts_timer = 1;
-				bbr_start_hpts_timer(bbr, tp, cts, 10, slot,
+				bbr_start_hpts_timer(bbr, tp, cts, 10, pacing_delay,
 				    tot_len);
 				return (error);
 			}
@@ -13841,9 +13835,9 @@ nomore:
 			}
 			/* FALLTHROUGH */
 		default:
-			slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
+			pacing_delay = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
 			bbr->rc_output_starts_timer = 1;
-			bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
+			bbr_start_hpts_timer(bbr, tp, cts, 11, pacing_delay, 0);
 			return (error);
 		}
 #ifdef STATS
@@ -13981,12 +13975,12 @@ skip_again:
 		tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
 	if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) {
 		/*
-		 * Calculate/Re-Calculate the hptsi slot in usecs based on
+		 * Calculate/Re-Calculate the hptsi timeout in usecs based on
 		 * what we have sent so far
 		 */
-		slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+		pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
 		if (bbr->rc_no_pacing)
-			slot = 0;
+			pacing_delay = 0;
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 enobufs:
@@ -13999,8 +13993,8 @@ enobufs:
 	    (more_to_rxt ||
 	     ((bbr->r_ctl.rc_resend = bbr_check_recovery_mode(tp, bbr, cts)) != NULL))) {
 		/* Rack cheats and shotguns out all rxt's 1ms apart */
-		if (slot > 1000)
-			slot = 1000;
+		if (pacing_delay > 1000)
+			pacing_delay = 1000;
 	}
 	if (bbr->bbr_hdrw_pacing && (bbr->hw_pacing_set == 0)) {
 		/*
@@ -14014,7 +14008,7 @@ enobufs:
 			tcp_bbr_tso_size_check(bbr, cts);
 		}
 	}
-	bbr_start_hpts_timer(bbr, tp, cts, 12, slot, tot_len);
+	bbr_start_hpts_timer(bbr, tp, cts, 12, pacing_delay, tot_len);
 	if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
 		/* Make sure snd_nxt is drug up */
 		tp->snd_nxt = tp->snd_max;
@@ -14132,8 +14126,7 @@ bbr_switch_failed(struct tcpcb *tp)
 		}
 	} else
 		toval = HPTS_USECS_PER_SLOT;
-	(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
-				   __LINE__, &diag);
+	tcp_hpts_insert(tp, toval, &diag);
 	bbr_log_hpts_diag(bbr, cts, &diag);
 }
 
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 11ef5ba706c5..c7962b57a69e 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -250,11 +250,11 @@ static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the co
 static int32_t rack_persist_min = 250000;	/* 250usec */
 static int32_t rack_persist_max = 2000000;	/* 2 Second in usec's */
 static int32_t rack_honors_hpts_min_to =  1;	/* Do we honor the hpts minimum time out for pacing timers */
-static uint32_t rack_max_reduce = 10;		/* Percent we can reduce slot by */
+static uint32_t rack_max_reduce = 10;		/* Percent we can reduce pacing delay by */
 static int32_t rack_sack_not_required = 1;	/* set to one to allow non-sack to use rack */
 static int32_t rack_limit_time_with_srtt = 0;
 static int32_t rack_autosndbuf_inc = 20;	/* In percentage form */
-static int32_t rack_enobuf_hw_boost_mult = 0;	/* How many times the hw rate we boost slot using time_between */
+static int32_t rack_enobuf_hw_boost_mult = 0;	/* How many times the hw rate we boost pacing delay using time_between */
 static int32_t rack_enobuf_hw_max = 12000;	/* 12 ms in usecs */
 static int32_t rack_enobuf_hw_min = 10000;	/* 10 ms in usecs */
 static int32_t rack_hw_rwnd_factor = 2;		/* How many max_segs the rwnd must be before we hold off sending */
@@ -278,7 +278,7 @@ static int32_t rack_hptsi_segments = 40;
 static int32_t rack_rate_sample_method = USE_RTT_LOW;
 static int32_t rack_pace_every_seg = 0;
 static int32_t rack_delayed_ack_time = 40000;	/* 40ms in usecs */
-static int32_t rack_slot_reduction = 4;
+static int32_t rack_pacing_delay_reduction = 4;
 static int32_t rack_wma_divisor = 8;		/* For WMA calculation */
 static int32_t rack_cwnd_block_ends_measure = 0;
 static int32_t rack_rwnd_block_ends_measure = 0;
@@ -478,7 +478,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
     uint16_t flex7, uint8_t mod);
 
 static void
-rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
    uint64_t bw_est, uint64_t bw, uint64_t len_time, int method, int line,
    struct rack_sendmap *rsm, uint8_t quality);
 static struct rack_sendmap *
@@ -1107,7 +1107,7 @@ rack_init_sysctls(void)
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "burst_reduces", CTLFLAG_RW,
-	    &rack_slot_reduction, 4,
+	    &rack_pacing_delay_reduction, 4,
 	    "When doing only burst mitigation what is the reduce divisor");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1399,7 +1399,7 @@ rack_init_sysctls(void)
 	    SYSCTL_CHILDREN(rack_timers),
 	    OID_AUTO, "hpts_max_reduce", CTLFLAG_RW,
 	    &rack_max_reduce, 10,
-	    "Max percentage we will reduce slot by for pacing when we are behind");
+	    "Max percentage we will reduce pacing delay by for pacing when we are behind");
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_timers),
 	    OID_AUTO, "persmin", CTLFLAG_RW,
@@ -2700,7 +2700,7 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
 }
 
 static void
-rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
 {
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
@@ -2710,7 +2710,7 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
 		log.u_bbr.flex1 = rack->rc_tp->t_srtt;
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
-		log.u_bbr.flex4 = slot;
+		log.u_bbr.flex4 = pacing_delay;
 		log.u_bbr.flex5 = rack->rc_tp->t_hpts_slot;
 		log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
 		log.u_bbr.flex7 = rack->rc_in_persist;
@@ -3034,14 +3034,14 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
 }
 
 static void
-rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv, int line)
+rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay, uint32_t cts, struct timeval *tv, int line)
 {
 	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		if (rack->rack_no_prr)
 			log.u_bbr.flex2 = 0;
 		else
@@ -3139,7 +3139,7 @@ rack_log_type_pacing_sizes(struct tcpcb *tp, struct tcp_rack *rack, uint32_t arg
 }
 
 static void
-rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot,
+rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t pacing_delay,
 			  uint8_t hpts_calling, int reason, uint32_t cwnd_to_use)
 {
 	if (tcp_bblogging_on(rack->rc_tp)) {
@@ -3148,7 +3148,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex4 = reason;
 		if (rack->rack_no_prr)
@@ -6482,7 +6482,7 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
 		log.u_bbr.flex2 = diag->p_cur_slot;
 		log.u_bbr.flex3 = diag->slot_req;
 		log.u_bbr.flex4 = diag->inp_hptsslot;
-		log.u_bbr.flex5 = diag->slot_remaining;
+		log.u_bbr.flex5 = diag->time_remaining;
 		log.u_bbr.flex6 = diag->need_new_to;
 		log.u_bbr.flex7 = diag->p_hpts_active;
 		log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -6497,9 +6497,6 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
 		log.u_bbr.rttProp = diag->wheel_cts;
 		log.u_bbr.timeStamp = cts;
 		log.u_bbr.delRate = diag->maxslots;
-		log.u_bbr.cur_del_rate = diag->p_curtick;
-		log.u_bbr.cur_del_rate <<= 32;
-		log.u_bbr.cur_del_rate |= diag->p_lasttick;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -6532,14 +6529,14 @@ rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uin
 
 static void
 rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
-      int32_t slot, uint32_t tot_len_this_send, int sup_rack)
+      int32_t usecs, uint32_t tot_len_this_send, int sup_rack)
 {
 	struct hpts_diag diag;
 	struct inpcb *inp = tptoinpcb(tp);
 	struct timeval tv;
 	uint32_t delayed_ack = 0;
 	uint32_t hpts_timeout;
-	uint32_t entry_slot = slot;
+	uint32_t entry_usecs = usecs;
 	uint8_t stopped;
 	uint32_t left = 0;
 	uint32_t us_cts;
@@ -6560,7 +6557,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	rack->r_ctl.rc_hpts_flags = 0;
 	us_cts = tcp_get_usecs(&tv);
 	/* Now early/late accounting */
-	rack_log_pacing_delay_calc(rack, entry_slot, slot, 0, 0, 0, 26, __LINE__, NULL, 0);
+	rack_log_pacing_delay_calc(rack, entry_usecs, usecs, 0, 0, 0, 26, __LINE__, NULL, 0);
 	if (rack->r_early && (rack->rc_ack_can_sendout_data == 0)) {
 		/*
 		 * We have a early carry over set,
@@ -6571,7 +6568,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * penalize the next timer for being awoke
 		 * by an ack aka the rc_agg_early (non-paced mode).
 		 */
-		slot += rack->r_ctl.rc_agg_early;
+		usecs += rack->r_ctl.rc_agg_early;
 		rack->r_early = 0;
 		rack->r_ctl.rc_agg_early = 0;
 	}
@@ -6583,29 +6580,29 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * really depends on what
 		 * the current pacing time is.
 		 */
-		if (rack->r_ctl.rc_agg_delayed >= slot) {
+		if (rack->r_ctl.rc_agg_delayed >= usecs) {
 			/*
 			 * We can't compensate for it all.
 			 * And we have to have some time
 			 * on the clock. We always have a min
-			 * 10 slots (10 x 10 i.e. 100 usecs).
+			 * 10 HPTS timer units (10 x 10 i.e. 100 usecs).
 			 */
-			if (slot <= HPTS_USECS_PER_SLOT) {
+			if (usecs <= HPTS_USECS_PER_SLOT) {
 				/* We gain delay */
-				rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - slot);
-				slot = HPTS_USECS_PER_SLOT;
+				rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - usecs);
+				usecs = HPTS_USECS_PER_SLOT;
 			} else {
 				/* We take off some */
-				rack->r_ctl.rc_agg_delayed -= (slot - HPTS_USECS_PER_SLOT);
-				slot = HPTS_USECS_PER_SLOT;
+				rack->r_ctl.rc_agg_delayed -= (usecs - HPTS_USECS_PER_SLOT);
+				usecs = HPTS_USECS_PER_SLOT;
 			}
 		} else {
-			slot -= rack->r_ctl.rc_agg_delayed;
+			usecs -= rack->r_ctl.rc_agg_delayed;
 			rack->r_ctl.rc_agg_delayed = 0;
 			/* Make sure we have 100 useconds at minimum */
-			if (slot < HPTS_USECS_PER_SLOT) {
-				rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - slot;
-				slot = HPTS_USECS_PER_SLOT;
+			if (usecs < HPTS_USECS_PER_SLOT) {
+				rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - usecs;
+				usecs = HPTS_USECS_PER_SLOT;
 			}
 			if (rack->r_ctl.rc_agg_delayed == 0)
 				rack->r_late = 0;
@@ -6614,17 +6611,17 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		/* r_use_hpts_min is on and so is DGP */
 		uint32_t max_red;
 
-		max_red = (slot * rack->r_ctl.max_reduction) / 100;
+		max_red = (usecs * rack->r_ctl.max_reduction) / 100;
 		if (max_red >= rack->r_ctl.rc_agg_delayed) {
-			slot -= rack->r_ctl.rc_agg_delayed;
+			usecs -= rack->r_ctl.rc_agg_delayed;
 			rack->r_ctl.rc_agg_delayed = 0;
 		} else {
-			slot -= max_red;
+			usecs -= max_red;
 			rack->r_ctl.rc_agg_delayed -= max_red;
 		}
 	}
 	if ((rack->r_use_hpts_min == 1) &&
-	    (slot > 0) &&
+	    (usecs > 0) &&
 	    (rack->dgp_on == 1)) {
 		/*
 		 * We are enforcing a min pacing timer
@@ -6633,8 +6630,8 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		uint32_t min;
 
 		min = get_hpts_min_sleep_time();
-		if (min > slot) {
-			slot = min;
+		if (min > usecs) {
+			usecs = min;
 		}
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
@@ -6652,7 +6649,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	 * wheel, we resort to a keep-alive timer if its configured.
 	 */
 	if ((hpts_timeout == 0) &&
-	    (slot == 0)) {
+	    (usecs == 0)) {
 		if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING)) {
 			/*
@@ -6709,10 +6706,10 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 			hpts_timeout = 0x7ffffffe;
 		rack->r_ctl.rc_timer_exp = cts + hpts_timeout;
 	}
-	rack_log_pacing_delay_calc(rack, entry_slot, slot, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
+	rack_log_pacing_delay_calc(rack, entry_usecs, usecs, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
 	if ((rack->gp_ready == 0) &&
 	    (rack->use_fixed_rate == 0) &&
-	    (hpts_timeout < slot) &&
+	    (hpts_timeout < usecs) &&
 	    (rack->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
 		/*
 		 * We have no good estimate yet for the
@@ -6722,7 +6719,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * pace that long since we know the calculation
 		 * so far is not accurate.
 		 */
-		slot = hpts_timeout;
+		usecs = hpts_timeout;
 	}
 	/**
 	 * Turn off all the flags for queuing by default. The
@@ -6754,11 +6751,11 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	 * so LRO can call into us.
 	 */
 	tp->t_flags2 &= ~(TF2_DONT_SACK_QUEUE|TF2_MBUF_QUEUE_READY);
-	if (slot) {
+	if (usecs) {
 		rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
-		rack->r_ctl.rc_last_output_to = us_cts + slot;
+		rack->r_ctl.rc_last_output_to = us_cts + usecs;
 		/*
-		 * A pacing timer (slot) is being set, in
+		 * A pacing timer (usecs microseconds) is being set, in
 		 * such a case we cannot send (we are blocked by
 		 * the timer). So lets tell LRO that it should not
 		 * wake us unless there is a SACK. Note this only
@@ -6799,20 +6796,18 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		}
 		if ((rack->use_rack_rr) &&
 		    (rack->r_rr_config < 2) &&
-		    ((hpts_timeout) && (hpts_timeout < slot))) {
+		    ((hpts_timeout) && (hpts_timeout < usecs))) {
 			/*
 			 * Arrange for the hpts to kick back in after the
 			 * t-o if the t-o does not cause a send.
 			 */
-			(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-						   __LINE__, &diag);
+			tcp_hpts_insert(tp, hpts_timeout, &diag);
 			rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-			rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+			rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
 		} else {
-			(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
-						   __LINE__, &diag);
+			tcp_hpts_insert(tp, usecs, &diag);
 			rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-			rack_log_to_start(rack, cts, hpts_timeout, slot, 1);
+			rack_log_to_start(rack, cts, hpts_timeout, usecs, 1);
 		}
 	} else if (hpts_timeout) {
 		/*
@@ -6824,22 +6819,21 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * at the start of this block) are good enough.
 		 */
 		rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, hpts_timeout, &diag);
 		rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-		rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+		rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
 	} else {
 		/* No timer starting */
 #ifdef INVARIANTS
 		if (SEQ_GT(tp->snd_max, tp->snd_una)) {
-			panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
-			    tp, rack, tot_len_this_send, cts, slot, hpts_timeout);
+			panic("tp:%p rack:%p tlts:%d cts:%u usecs:%u pto:%u -- no timer started?",
+			    tp, rack, tot_len_this_send, cts, usecs, hpts_timeout);
 		}
 #endif
 	}
 	rack->rc_tmr_stopped = 0;
-	if (slot)
-		rack_log_type_bbrsnd(rack, tot_len_this_send, slot, us_cts, &tv, __LINE__);
+	if (usecs)
+		rack_log_type_bbrsnd(rack, tot_len_this_send, usecs, us_cts, &tv, __LINE__);
 }
 
 static void
@@ -8016,7 +8010,7 @@ rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8
 		rack->rc_tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
 		ret = -3;
 		left = rack->r_ctl.rc_timer_exp - cts;
-		tcp_hpts_insert(tp, HPTS_MS_TO_SLOTS(left));
+		tcp_hpts_insert(tp, left, NULL);
 		rack_log_to_processing(rack, cts, ret, left);
 		return (1);
 	}
@@ -14377,8 +14371,7 @@ rack_switch_failed(struct tcpcb *tp)
 		}
 	} else
 		toval = HPTS_USECS_PER_SLOT;
-	(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
-				   __LINE__, &diag);
+	tcp_hpts_insert(tp, toval, &diag);
 	rack_log_hpts_diag(rack, cts, &diag, &tv);
 }
 
@@ -14973,8 +14966,7 @@ rack_init(struct tcpcb *tp, void **ptr)
 				if (tov) {
 					struct hpts_diag diag;
 
-					(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(tov),
-								   __LINE__, &diag);
+					tcp_hpts_insert(tp, tov, &diag);
 					rack_log_hpts_diag(rack, us_cts, &diag, &rack->r_ctl.act_rcv_time);
 				}
 			}
@@ -16367,7 +16359,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	struct rack_sendmap *rsm;
 	int32_t prev_state = 0;
 	int no_output = 0;
-	int slot_remaining = 0;
+	int time_remaining = 0;
 #ifdef TCP_ACCOUNTING
 	int ack_val_set = 0xf;
 #endif
@@ -16416,7 +16408,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		 * could be, if a sack is present, we want to be awoken and
 		 * so should process the packets.
 		 */
-		slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
+		time_remaining = rack->r_ctl.rc_last_output_to - us_cts;
 		if (rack->rc_tp->t_flags2 & TF2_DONT_SACK_QUEUE) {
 			no_output = 1;
 		} else {
@@ -16436,7 +16428,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			     (*ts_ptr == TCP_LRO_TS_OPTION)))
 				no_output = 1;
 		}
-		if ((no_output == 1) && (slot_remaining < tcp_min_hptsi_time)) {
+		if ((no_output == 1) && (time_remaining < tcp_min_hptsi_time)) {
 			/*
 			 * It is unrealistic to think we can pace in less than
 			 * the minimum granularity of the pacer (def:250usec). So
@@ -16919,10 +16911,10 @@ do_output_now:
 			   (tcp_in_hpts(rack->rc_tp) == 0)) {
 			/*
 			 * We are not in hpts and we had a pacing timer up. Use
-			 * the remaining time (slot_remaining) to restart the timer.
+			 * the remaining time (time_remaining) to restart the timer.
 			 */
-			KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
-			rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0);
+			KASSERT ((time_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
+			rack_start_hpts_timer(rack, tp, cts, time_remaining, 0, 0);
 			rack_free_trim(rack);
 		}
 		/* Clear the flag, it may have been cleared by output but we may not have  */
@@ -17102,7 +17094,7 @@ check_it:
 }
 
 static void
-rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
 			   uint64_t bw_est, uint64_t bw, uint64_t len_time, int method,
 			   int line, struct rack_sendmap *rsm, uint8_t quality)
 {
@@ -17125,7 +17117,7 @@ rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
 			}
 		}
 		memset(&log, 0, sizeof(log));
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = len;
 		log.u_bbr.flex3 = rack->r_ctl.rc_pace_min_segs;
 		log.u_bbr.flex4 = rack->r_ctl.rc_pace_max_segs;
@@ -17284,25 +17276,25 @@ rack_arrive_at_discounted_rate(struct tcp_rack *rack, uint64_t window_input, uin
 }
 
 static int32_t
-pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
+pace_to_fill_cwnd(struct tcp_rack *rack, int32_t pacing_delay, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
 {
 	uint64_t lentim, fill_bw;
 
 	rack->r_via_fill_cw = 0;
 	if (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.cwnd_to_use)
-		return (slot);
+		return (pacing_delay);
 	if ((ctf_outstanding(rack->rc_tp) + (segsiz-1)) > rack->rc_tp->snd_wnd)
-		return (slot);
+		return (pacing_delay);
 	if (rack->r_ctl.rc_last_us_rtt == 0)
-		return (slot);
+		return (pacing_delay);
 	if (rack->rc_pace_fill_if_rttin_range &&
 	    (rack->r_ctl.rc_last_us_rtt >=
 	     (get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack->rtt_limit_mul))) {
 		/* The rtt is huge, N * smallest, lets not fill */
-		return (slot);
+		return (pacing_delay);
 	}
 	if (rack->r_ctl.fillcw_cap && *rate_wanted >= rack->r_ctl.fillcw_cap)
-		return (slot);
+		return (pacing_delay);
 	/*
 	 * first lets calculate the b/w based on the last us-rtt
 	 * and the the smallest send window.
@@ -17368,7 +17360,7 @@ at_lt_bw:
 	if (non_paced)
 		*rate_wanted = fill_bw;
 	if ((fill_bw < RACK_MIN_BW) || (fill_bw < *rate_wanted))
-		return (slot);
+		return (pacing_delay);
 	rack->r_via_fill_cw = 1;
 	if (rack->r_rack_hw_rate_caps &&
 	    (rack->r_ctl.crte != NULL)) {
@@ -17423,19 +17415,19 @@ at_lt_bw:
 	lentim = (uint64_t)(len) * (uint64_t)HPTS_USEC_IN_SEC;
 	lentim /= fill_bw;
 	*rate_wanted = fill_bw;
-	if (non_paced || (lentim < slot)) {
-		rack_log_pacing_delay_calc(rack, len, slot, fill_bw,
+	if (non_paced || (lentim < pacing_delay)) {
+		rack_log_pacing_delay_calc(rack, len, pacing_delay, fill_bw,
 					   0, lentim, 12, __LINE__, NULL, 0);
 		return ((int32_t)lentim);
 	} else
-		return (slot);
+		return (pacing_delay);
 }
 
 static int32_t
 rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
 {
 	uint64_t srtt;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	int can_start_hw_pacing = 1;
 	int err;
 	int pace_one;
@@ -17483,25 +17475,25 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		 * cwnd. Which in that case we are just waiting for
 		 * a ACK.
 		 */
-		slot = len / tr_perms;
+		pacing_delay = len / tr_perms;
 		/* Now do we reduce the time so we don't run dry? */
-		if (slot && rack_slot_reduction) {
-			reduce = (slot / rack_slot_reduction);
-			if (reduce < slot) {
-				slot -= reduce;
+		if (pacing_delay && rack_pacing_delay_reduction) {
+			reduce = (pacing_delay / rack_pacing_delay_reduction);
+			if (reduce < pacing_delay) {
+				pacing_delay -= reduce;
 			} else
-				slot = 0;
+				pacing_delay = 0;
 		} else
 			reduce = 0;
-		slot *= HPTS_USEC_IN_MSEC;
+		pacing_delay *= HPTS_USEC_IN_MSEC;
 		if (rack->rc_pace_to_cwnd) {
 			uint64_t rate_wanted = 0;
 
-			slot = pace_to_fill_cwnd(rack, slot, len, segsiz, NULL, &rate_wanted, 1);
+			pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, len, segsiz, NULL, &rate_wanted, 1);
 			rack->rc_ack_can_sendout_data = 1;
-			rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
+			rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
 		} else
-			rack_log_pacing_delay_calc(rack, len, slot, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
+			rack_log_pacing_delay_calc(rack, len, pacing_delay, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
 		/*******************************************************/
 		/* RRS: We insert non-paced call to stats here for len */
 		/*******************************************************/
@@ -17575,7 +17567,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		segs *= oh;
 		lentim = (uint64_t)(len + segs) * (uint64_t)HPTS_USEC_IN_SEC;
 		res = lentim / rate_wanted;
-		slot = (uint32_t)res;
+		pacing_delay = (uint32_t)res;
 		if (rack_hw_rate_min &&
 		    (rate_wanted < rack_hw_rate_min)) {
 			can_start_hw_pacing = 0;
@@ -17635,7 +17627,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 			 * We want to pace at our rate *or* faster to
 			 * fill the cwnd to the max if its not full.
 			 */
-			slot = pace_to_fill_cwnd(rack, slot, (len+segs), segsiz, &capped, &rate_wanted, 0);
+			pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, (len+segs), segsiz, &capped, &rate_wanted, 0);
 			/* Re-check to make sure we are not exceeding our max b/w */
 			if ((rack->r_ctl.crte != NULL) &&
 			    (tcp_hw_highest_rate(rack->r_ctl.crte) < rate_wanted)) {
@@ -17786,15 +17778,15 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				srtt = rack->rc_tp->t_srtt;
 			else
 				srtt = RACK_INITIAL_RTO * HPTS_USEC_IN_MSEC;	/* its in ms convert */
-			if (srtt < (uint64_t)slot) {
-				rack_log_pacing_delay_calc(rack, srtt, slot, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
-				slot = srtt;
+			if (srtt < (uint64_t)pacing_delay) {
+				rack_log_pacing_delay_calc(rack, srtt, pacing_delay, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
+				pacing_delay = srtt;
 			}
 		}
 		/*******************************************************************/
 		/* RRS: We insert paced call to stats here for len and rate_wanted */
 		/*******************************************************************/
-		rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
+		rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
 	}
 	if (rack->r_ctl.crte && (rack->r_ctl.crte->rs_num_enobufs > 0)) {
 		/*
@@ -17811,9 +17803,9 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 			hw_boost_delay = rack_enobuf_hw_max;
 		else if (hw_boost_delay < rack_enobuf_hw_min)
 			hw_boost_delay = rack_enobuf_hw_min;
-		slot += hw_boost_delay;
+		pacing_delay += hw_boost_delay;
 	}
-	return (slot);
+	return (pacing_delay);
 }
 
 static void
@@ -18482,7 +18474,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	struct tcpopt to;
 	u_char opt[TCP_MAXOLEN];
 	uint32_t hdrlen, optlen;
-	int32_t slot, segsiz, max_val, tso = 0, error = 0, ulen = 0;
+	int32_t pacing_delay, segsiz, max_val, tso = 0, error = 0, ulen = 0;
 	uint16_t flags;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
@@ -18688,9 +18680,9 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	}
 	if (rack->r_ctl.crte != NULL) {
 		/* See if we can send via the hw queue */
-		slot = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
+		pacing_delay = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
 		/* If there is nothing in queue (no pacing time) we can send via the hw queue */
-		if (slot == 0)
+		if (pacing_delay == 0)
 			ip_sendflag = 0;
 	}
 	tcp_set_flags(th, flags);
@@ -18955,20 +18947,20 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 				rack_log_queue_level(tp, rack, len, tv, cts);
 		} else
 			tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
-		slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+		pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
 		if (rack->rc_enobuf < 0x7f)
 			rack->rc_enobuf++;
-		if (slot < (10 * HPTS_USEC_IN_MSEC))
-			slot = 10 * HPTS_USEC_IN_MSEC;
+		if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
 		if (rack->r_ctl.crte != NULL) {
 			counter_u64_add(rack_saw_enobuf_hw, 1);
 			tcp_rl_log_enobuf(rack->r_ctl.crte);
 		}
 		counter_u64_add(rack_saw_enobuf, 1);
 	} else {
-		slot = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
+		pacing_delay = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
 	}
-	rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19071,7 +19063,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
 #ifdef TCP_ACCOUNTING
 	int cnt_thru = 1;
 #endif
-	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+	int32_t pacing_delay, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
 	uint16_t flags;
 	uint32_t s_soff;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
@@ -19519,8 +19511,8 @@ again:
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	counter_u64_add(rack_fto_send, 1);
-	slot = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
-	rack_start_hpts_timer(rack, tp, cts, slot, *tot_len, 0);
+	pacing_delay = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, *tot_len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19707,7 +19699,7 @@ rack_output(struct tcpcb *tp)
 	struct rack_sendmap *rsm = NULL;
 	int32_t tso, mtu;
 	struct tcpopt to;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	int32_t sup_rack = 0;
 	uint32_t cts, ms_cts, delayed, early;
 	uint32_t add_flag = RACK_SENT_SP;
@@ -20070,7 +20062,7 @@ again:
 		if (rsm == NULL) {
 			if (hpts_calling)
 				/* Retry in a ms */
-				slot = (1 * HPTS_USEC_IN_MSEC);
+				pacing_delay = (1 * HPTS_USEC_IN_MSEC);
 			so = inp->inp_socket;
 			sb = &so->so_snd;
 			goto just_return_nolock;
@@ -20877,7 +20869,7 @@ just_return_nolock:
 		}
 		if (tot_len_this_send > 0) {
 			rack->r_ctl.fsb.recwin = recwin;
-			slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
+			pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
 			if ((error == 0) &&
 			    rack_use_rfo &&
 			    ((flags & (TH_SYN|TH_FIN)) == 0) &&
@@ -21060,8 +21052,8 @@ just_return_nolock:
 			/* Yes lets make sure to move to persist before timer-start */
 			rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, tp->snd_una);
 		}
-		rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
-		rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
+		rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, sup_rack);
+		rack_log_type_just_return(rack, cts, tot_len_this_send, pacing_delay, hpts_calling, app_limited, cwnd_to_use);
 	}
 #ifdef NETFLIX_SHARED_CWND
 	if ((sbavail(sb) == 0) &&
@@ -21100,8 +21092,8 @@ send:
 		 * we come around to again, the flag will be clear.
 		 */
 		check_done = 1;
-		slot = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
-		if (slot) {
+		pacing_delay = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
+		if (pacing_delay) {
 			rack->r_ctl.rc_agg_delayed = 0;
 			rack->r_ctl.rc_agg_early = 0;
 			rack->r_early = 0;
@@ -22358,11 +22350,11 @@ nomore:
 					rack_log_queue_level(tp, rack, len, &tv, cts);
 			} else
 				tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
-			slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+			pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
 			if (rack->rc_enobuf < 0x7f)
 				rack->rc_enobuf++;
-			if (slot < (10 * HPTS_USEC_IN_MSEC))
-				slot = 10 * HPTS_USEC_IN_MSEC;
+			if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+				pacing_delay = 10 * HPTS_USEC_IN_MSEC;
 			if (rack->r_ctl.crte != NULL) {
 				counter_u64_add(rack_saw_enobuf_hw, 1);
 				tcp_rl_log_enobuf(rack->r_ctl.crte);
@@ -22389,8 +22381,8 @@ nomore:
 					goto again;
 				}
 			}
-			slot = 10 * HPTS_USEC_IN_MSEC;
-			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+			rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
 #ifdef TCP_ACCOUNTING
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22412,8 +22404,8 @@ nomore:
 			}
 			/* FALLTHROUGH */
 		default:
-			slot = 10 * HPTS_USEC_IN_MSEC;
-			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+			rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
 #ifdef TCP_ACCOUNTING
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22456,18 +22448,18 @@ enobufs:
 		/*
 		 * We don't send again after sending a RST.
 		 */
-		slot = 0;
+		pacing_delay = 0;
 		sendalot = 0;
 		if (error == 0)
 			tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
-	} else if ((slot == 0) && (sendalot == 0) && tot_len_this_send) {
+	} else if ((pacing_delay == 0) && (sendalot == 0) && tot_len_this_send) {
 		/*
 		 * Get our pacing rate, if an error
 		 * occurred in sending (ENOBUF) we would
 		 * hit the else if with slot preset. Other
 		 * errors return.
 		 */
-		slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
+		pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
 	}
 	/* We have sent clear the flag */
 	rack->r_ent_rec_ns = 0;
@@ -22499,7 +22491,7 @@ enobufs:
 		 */
 		tp->t_flags &= ~(TF_WASCRECOVERY|TF_WASFRECOVERY);
 	}
-	if (slot) {
+	if (pacing_delay) {
 		/* set the rack tcb into the slot N */
 		if ((error == 0) &&
 		    rack_use_rfo &&
@@ -22564,7 +22556,7 @@ skip_all_send:
 	/* Assure when we leave that snd_nxt will point to top */
 	if (SEQ_GT(tp->snd_max, tp->snd_nxt))
 		tp->snd_nxt = tp->snd_max;
-	rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, 0);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount() - ts_val;
 	if (tot_len_this_send) {
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index f842a5678fa1..be20fb44a820 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1046,6 +1046,8 @@ abort:
  *
  * On syncache_socket() success the newly created socket
  * has its underlying inp locked.
+ *
+ * *lsop is updated, if and only if 1 is returned.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -1094,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				 */
 				SCH_UNLOCK(sch);
 				TCPSTAT_INC(tcps_sc_spurcookie);
-				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 					    "segment rejected "
 					    "(syncookies disabled)\n",
 					    s, __func__);
-				goto failed;
+					free(s, M_TCPLOG);
+				}
+				return (0);
 			}
 			if (sch->sch_last_overflow <
 			    time_uptime - SYNCOOKIE_LIFETIME) {
@@ -1109,12 +1113,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				 */
 				SCH_UNLOCK(sch);
 				TCPSTAT_INC(tcps_sc_spurcookie);
-				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 					    "segment rejected "
 					    "(no syncache entry)\n",
 					    s, __func__);
-				goto failed;
+					free(s, M_TCPLOG);
+				}
+				return (0);
 			}
 			SCH_UNLOCK(sch);
 		}
@@ -1128,11 +1134,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 			TCPSTAT_INC(tcps_sc_recvcookie);
 		} else {
 			TCPSTAT_INC(tcps_sc_failcookie);
-			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
-			goto failed;
+				free(s, M_TCPLOG);
+			}
+			return (0);
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* If received ACK has MD5 signature, check it. */
@@ -1206,9 +1214,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "%s; %s: SEG.TSval %u < TS.Recent %u, "
 				    "segment dropped\n", s, __func__,
 				    to->to_tsval, sc->sc_tsreflect);
-				free(s, M_TCPLOG);
 			}
 			SCH_UNLOCK(sch);
+			free(s, M_TCPLOG);
 			return (-1);  /* Do not send RST */
 		}
 
@@ -1225,7 +1233,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "expected, segment processed normally\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
-				s = NULL;
 			}
 		}
 
@@ -1312,16 +1319,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
-failed:
-	if (sc != NULL) {
-		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
-		if (sc != &scs)
-			syncache_free(sc);
-	}
-	if (s != NULL)
-		free(s, M_TCPLOG);
-	*lsop = NULL;
-	return (0);
 }
 
 static struct socket *
diff --git a/sys/netinet6/in6_fib_algo.c b/sys/netinet6/in6_fib_algo.c
index 10ffe7ab0265..ef5cfc6d5ef6 100644
--- a/sys/netinet6/in6_fib_algo.c
+++ b/sys/netinet6/in6_fib_algo.c
@@ -351,7 +351,7 @@ struct fib_lookup_module flm_radix6 = {
 };
 
 static void
-fib6_algo_init(void)
+fib6_algo_init(void *dummy __unused)
 {
 
 	fib_module_register(&flm_radix6_lockless);
diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c
index 737d4a50098a..05a01b75e0bb 100644
--- a/sys/netipsec/xform_ipcomp.c
+++ b/sys/netipsec/xform_ipcomp.c
@@ -750,7 +750,7 @@ static struct xformsw ipcomp_xformsw = {
 };
 
 static void
-ipcomp_attach(void)
+ipcomp_attach(void *dummy __unused)
 {
 
 #ifdef INET
@@ -763,7 +763,7 @@ ipcomp_attach(void)
 }
 
 static void
-ipcomp_detach(void)
+ipcomp_detach(void *dummy __unused)
 {
 
 #ifdef INET
diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c
index 03116cb0641c..3a8de2b2bfee 100644
--- a/sys/netpfil/ipfw/ip_dn_io.c
+++ b/sys/netpfil/ipfw/ip_dn_io.c
@@ -43,6 +43,7 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
@@ -70,6 +71,9 @@
 #endif
 #include <netpfil/ipfw/dn_sched.h>
 
+SDT_PROVIDER_DEFINE(dummynet);
+SDT_PROBE_DEFINE2(dummynet, , , drop, "struct mbuf *", "struct dn_queue *");
+
 /*
  * We keep a private variable for the simulation time, but we could
  * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
@@ -545,6 +549,7 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
 
 drop:
 	V_dn_cfg.io_pkt_drop++;
+	SDT_PROBE2(dummynet, , , drop, m, q);
 	q->ni.drops++;
 	ni->drops++;
 	FREE_PKT(m);
@@ -1001,6 +1006,7 @@ done:
 
 dropit:
 	V_dn_cfg.io_pkt_drop++;
+	SDT_PROBE2(dummynet, , , drop, m, q);
 	DN_BH_WUNLOCK();
 	if (m)
 		FREE_PKT(m);
diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c
index b3f52322425f..d522f9da0fbe 100644
--- a/sys/netpfil/ipfw/ip_dummynet.c
+++ b/sys/netpfil/ipfw/ip_dummynet.c
@@ -1150,7 +1150,7 @@ copy_data_helper(void *_o, void *_arg)
 		    return 0;	/* not a pipe */
 
 		/* see if the object is within one of our ranges */
-		for (;r < lim; r += 2) {
+		for (; r < lim; r += 2) {
 			if (n < r[0] || n > r[1])
 				continue;
 			/* Found a valid entry, copy and we are done */
@@ -1183,7 +1183,7 @@ copy_data_helper(void *_o, void *_arg)
 		if (n >= DN_MAX_ID)
 			return 0;
 		/* see if the object is within one of our ranges */
-		for (;r < lim; r += 2) {
+		for (; r < lim; r += 2) {
 			if (n < r[0] || n > r[1])
 				continue;
 			if (copy_flowset(a, fs, 0))
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index b59d8d08bf80..d15d7760d7f1 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -3578,11 +3578,9 @@ sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
 /*
  * Stuff that must be initialised only on boot or module load
  */
-static int
-ipfw_init(void)
+static void
+ipfw_init(void *dummy __unused)
 {
-	int error = 0;
-
 	/*
  	 * Only print out this stuff the first time around,
 	 * when called from the sysinit code.
@@ -3627,14 +3625,13 @@ ipfw_init(void)
 	ipfw_init_sopt_handler();
 	ipfw_init_obj_rewriter();
 	ipfw_iface_init();
-	return (error);
 }
 
 /*
  * Called for the removal of the last instance only on module unload.
  */
 static void
-ipfw_destroy(void)
+ipfw_destroy(void *dummy __unused)
 {
 
 	ipfw_iface_destroy();
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 1e2ff1bca290..8bd27f6885ab 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -999,9 +999,11 @@ ipfw_nat_del(struct sockopt *sopt)
 {
 	struct cfg_nat *ptr;
 	struct ip_fw_chain *chain = &V_layer3_chain;
-	int i;
+	int error, i;
 
-	sooptcopyin(sopt, &i, sizeof i, sizeof i);
+	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+	if (error != 0)
+		return (error);
 	/* XXX validate i */
 	IPFW_UH_WLOCK(chain);
 	ptr = lookup_nat(&chain->nat, i);
@@ -1104,7 +1106,7 @@ ipfw_nat_get_log(struct sockopt *sopt)
 {
 	uint8_t *data;
 	struct cfg_nat *ptr;
-	int i, size;
+	int error, i, size;
 	struct ip_fw_chain *chain;
 	IPFW_RLOCK_TRACKER;
 
@@ -1134,9 +1136,9 @@ ipfw_nat_get_log(struct sockopt *sopt)
 		i += LIBALIAS_BUF_SIZE;
 	}
 	IPFW_RUNLOCK(chain);
-	sooptcopyout(sopt, data, size);
+	error = sooptcopyout(sopt, data, size);
 	free(data, M_IPFW);
-	return(0);
+	return (error);
 }
 
 static int
@@ -1166,7 +1168,7 @@ vnet_ipfw_nat_uninit(const void *arg __unused)
 }
 
 static void
-ipfw_nat_init(void)
+ipfw_nat_init(void *dummy __unused)
 {
 
 	/* init ipfw hooks */
@@ -1183,7 +1185,7 @@ ipfw_nat_init(void)
 }
 
 static void
-ipfw_nat_destroy(void)
+ipfw_nat_destroy(void *dummy __unused)
 {
 
 	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index d58af6e5ec4d..a4557f139ae5 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -259,7 +259,7 @@ static void		dehook_pf_eth(void);
 static void		dehook_pf(void);
 static int		shutdown_pf(void);
 static int		pf_load(void);
-static void		pf_unload(void);
+static void		pf_unload(void *);
 
 static struct cdevsw pf_cdevsw = {
 	.d_ioctl =	pfioctl,
@@ -7082,7 +7082,7 @@ pf_unload_vnet(void)
 }
 
 static void
-pf_unload(void)
+pf_unload(void *dummy __unused)
 {
 
 	sx_xlock(&pf_end_lock);
diff --git a/sys/nfs/nfs_diskless.c b/sys/nfs/nfs_diskless.c
index 42cfee63d184..0f0cf80feeec 100644
--- a/sys/nfs/nfs_diskless.c
+++ b/sys/nfs/nfs_diskless.c
@@ -428,7 +428,7 @@ decode_nfshandle(char *ev, u_char *fh, int maxfh)
 
 #if !defined(BOOTP_NFSROOT)
 static void
-nfs_rootconf(void)
+nfs_rootconf(void *dummy __unused)
 {
 
 	nfs_setup_diskless();
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 796b1719b8ba..01bf4c7e90a8 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -297,7 +297,7 @@ static u_int		moea64_clear_bit(vm_page_t, uint64_t);
 static void		moea64_kremove(vm_offset_t);
 static void		moea64_syncicache(pmap_t pmap, vm_offset_t va,
 			    vm_paddr_t pa, vm_size_t sz);
-static void		moea64_pmap_init_qpages(void);
+static void		moea64_pmap_init_qpages(void *);
 static void		moea64_remove_locked(pmap_t, vm_offset_t,
 			    vm_offset_t, struct pvo_dlist *);
 
@@ -1284,7 +1284,7 @@ moea64_late_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
 }
 
 static void
-moea64_pmap_init_qpages(void)
+moea64_pmap_init_qpages(void *dummy __unused)
 {
 	struct pcpu *pc;
 	int i;
diff --git a/sys/powerpc/cpufreq/pmcr.c b/sys/powerpc/cpufreq/pmcr.c
index dd489b607606..6ae0777a8ac7 100644
--- a/sys/powerpc/cpufreq/pmcr.c
+++ b/sys/powerpc/cpufreq/pmcr.c
@@ -40,7 +40,8 @@ static int pstate_ids[256];
 static int pstate_freqs[256];
 static int npstates;
 
-static void parse_pstates(void)
+static void
+parse_pstates(void *dummy __unused)
 {
 	phandle_t node;
 
diff --git a/sys/riscv/include/vmm.h b/sys/riscv/include/vmm.h
index de7119dd534a..e227dd825966 100644
--- a/sys/riscv/include/vmm.h
+++ b/sys/riscv/include/vmm.h
@@ -123,10 +123,33 @@ struct vm_eventinfo {
 	int	*iptr;		/* reqidle cookie */
 };
 
+#define	DECLARE_VMMOPS_FUNC(ret_type, opname, args)		\
+	ret_type vmmops_##opname args
+
+DECLARE_VMMOPS_FUNC(int, modinit, (void));
+DECLARE_VMMOPS_FUNC(int, modcleanup, (void));
+DECLARE_VMMOPS_FUNC(void *, init, (struct vm *vm, struct pmap *pmap));
+DECLARE_VMMOPS_FUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
+    uint64_t gla, int prot, uint64_t *gpa, int *is_fault));
+DECLARE_VMMOPS_FUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
+    struct vm_eventinfo *info));
+DECLARE_VMMOPS_FUNC(void, cleanup, (void *vmi));
+DECLARE_VMMOPS_FUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
+    int vcpu_id));
+DECLARE_VMMOPS_FUNC(void, vcpu_cleanup, (void *vcpui));
+DECLARE_VMMOPS_FUNC(int, exception, (void *vcpui, uint64_t scause));
+DECLARE_VMMOPS_FUNC(int, getreg, (void *vcpui, int num, uint64_t *retval));
+DECLARE_VMMOPS_FUNC(int, setreg, (void *vcpui, int num, uint64_t val));
+DECLARE_VMMOPS_FUNC(int, getcap, (void *vcpui, int num, int *retval));
+DECLARE_VMMOPS_FUNC(int, setcap, (void *vcpui, int num, int val));
+DECLARE_VMMOPS_FUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
+    vm_offset_t max));
+DECLARE_VMMOPS_FUNC(void, vmspace_free, (struct vmspace *vmspace));
+
 int vm_create(const char *name, struct vm **retvm);
 struct vcpu *vm_alloc_vcpu(struct vm *vm, int vcpuid);
 void vm_disable_vcpu_creation(struct vm *vm);
-void vm_slock_vcpus(struct vm *vm);
+void vm_lock_vcpus(struct vm *vm);
 void vm_unlock_vcpus(struct vm *vm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
@@ -212,7 +235,6 @@ vcpu_should_yield(struct vcpu *vcpu)
 
 void *vcpu_stats(struct vcpu *vcpu);
 void vcpu_notify_event(struct vcpu *vcpu);
-struct vmspace *vm_vmspace(struct vm *vm);
 struct vm_mem *vm_mem(struct vm *vm);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h
index 870d0d6c5cd1..917a333520ed 100644
--- a/sys/riscv/vmm/riscv.h
+++ b/sys/riscv/vmm/riscv.h
@@ -122,29 +122,6 @@ struct hyptrap {
 	uint64_t htinst;
 };
 
-#define	DEFINE_VMMOPS_IFUNC(ret_type, opname, args)	\
-    ret_type vmmops_##opname args;
-
-DEFINE_VMMOPS_IFUNC(int, modinit, (void))
-DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
-DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
-DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
-    uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
-DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
-    struct vm_eventinfo *info))
-DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
-DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
-    int vcpu_id))
-DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
-DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause))
-DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
-DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
-DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
-DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
-DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
-    vm_offset_t max))
-DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
-
 #define	dprintf(fmt, ...)
 
 struct hypctx *riscv_get_active_vcpu(void);
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index ec4514f70fa6..4c9b1fa53f7a 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -92,7 +92,6 @@ struct vcpu {
 	struct fpreg	*guestfpu;	/* (a,i) guest fpu state */
 };
 
-#define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock_destroy(v)	mtx_destroy(&((v)->mtx))
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
@@ -121,7 +120,6 @@ struct vm {
 	bool		dying;			/* (o) is dying */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
-	struct vmspace	*vmspace;		/* (o) guest's address space */
 	struct vm_mem	mem;			/* (i) [m+v] guest memory */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (i) guest vcpus */
@@ -174,6 +172,7 @@ vcpu_cleanup(struct vcpu *vcpu, bool destroy)
 		vmm_stat_free(vcpu->stats);
 		fpu_save_area_free(vcpu->guestfpu);
 		vcpu_lock_destroy(vcpu);
+		free(vcpu, M_VMM);
 	}
 }
 
@@ -285,7 +284,7 @@ vm_init(struct vm *vm, bool create)
 {
 	int i;
 
-	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
+	vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
 	MPASS(vm->cookie != NULL);
 
 	CPU_ZERO(&vm->active_cpus);
@@ -347,9 +346,9 @@ vm_alloc_vcpu(struct vm *vm, int vcpuid)
 }
 
 void
-vm_slock_vcpus(struct vm *vm)
+vm_lock_vcpus(struct vm *vm)
 {
-	sx_slock(&vm->vcpus_init_lock);
+	sx_xlock(&vm->vcpus_init_lock);
 }
 
 void
@@ -362,7 +361,7 @@ int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
-	struct vmspace *vmspace;
+	int error;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
@@ -374,14 +373,13 @@ vm_create(const char *name, struct vm **retvm)
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
-	vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
-	if (vmspace == NULL)
-		return (ENOMEM);
-
 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
+	error = vm_mem_init(&vm->mem, 0, 1ul << 39);
+	if (error != 0) {
+		free(vm, M_VMM);
+		return (error);
+	}
 	strcpy(vm->name, name);
-	vm->vmspace = vmspace;
-	vm_mem_init(&vm->mem);
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 
 	vm->sockets = 1;
@@ -450,11 +448,6 @@ vm_cleanup(struct vm *vm, bool destroy)
 	if (destroy) {
 		vm_mem_destroy(vm);
 
-		vmmops_vmspace_free(vm->vmspace);
-		vm->vmspace = NULL;
-
-		for (i = 0; i < vm->maxcpus; i++)
-			free(vm->vcpu[i], M_VMM);
 		free(vm->vcpu, M_VMM);
 		sx_destroy(&vm->vcpus_init_lock);
 	}
@@ -760,12 +753,6 @@ vcpu_notify_event(struct vcpu *vcpu)
 	vcpu_unlock(vcpu);
 }
 
-struct vmspace *
-vm_vmspace(struct vm *vm)
-{
-	return (vm->vmspace);
-}
-
 struct vm_mem *
 vm_mem(struct vm *vm)
 {
@@ -1084,7 +1071,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 	vm = vcpu->vm;
 	vme = &vcpu->exitinfo;
 
-	pmap = vmspace_pmap(vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
 
 	dprintf("%s: %lx\n", __func__, addr);
@@ -1107,7 +1094,7 @@ vm_handle_paging(struct vcpu *vcpu, bool *retu)
 	if (pmap_fault(pmap, addr, ftype))
 		return (0);
 
-	map = &vm->vmspace->vm_map;
+	map = &vm_vmspace(vm)->vm_map;
 	rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
 	if (rv != KERN_SUCCESS) {
 		printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
@@ -1189,7 +1176,7 @@ vm_run(struct vcpu *vcpu)
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
-	pmap = vmspace_pmap(vm->vmspace);
+	pmap = vmspace_pmap(vm_vmspace(vm));
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = NULL;
 	evinfo.sptr = &vm->suspend;
diff --git a/sys/rpc/auth.h b/sys/rpc/auth.h
index 33c33ffd594d..648fb99a3a27 100644
--- a/sys/rpc/auth.h
+++ b/sys/rpc/auth.h
@@ -354,6 +354,10 @@ __END_DECLS
 #define RPCSEC_GSS	6		/* RPCSEC_GSS */
 #define	AUTH_TLS	7		/* Initiate RPC-over-TLS */
 
+/* RFC 5531's prescribed limits for variable-lenth arrays. */
+#define AUTH_SYS_MAX_HOSTNAME	255
+#define AUTH_SYS_MAX_GROUPS	16	/* Supplementary groups. */
+
 /*
  * Pseudo auth flavors for RPCSEC_GSS.
  */
diff --git a/sys/rpc/authunix_prot.c b/sys/rpc/authunix_prot.c
index b107d5541c50..ff4c12c3f52e 100644
--- a/sys/rpc/authunix_prot.c
+++ b/sys/rpc/authunix_prot.c
@@ -30,7 +30,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
 /*
  * authunix_prot.c
  * XDR for UNIX style authentication parameters for RPC
@@ -40,8 +39,7 @@
 
 #include <sys/param.h>
 #include <sys/jail.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
+#include <sys/libkern.h>
 #include <sys/ucred.h>
 
 #include <rpc/types.h>
@@ -50,9 +48,6 @@
 
 #include <rpc/rpc_com.h>
 
-/* gids compose part of a credential; there may not be more than 16 of them */
-#define NGRPS 16
-
 /*
  * XDR for unix authentication parameters.
  */
@@ -60,25 +55,23 @@ bool_t
 xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 {
 	uint32_t namelen;
-	uint32_t ngroups, i;
+	uint32_t supp_ngroups, i;
 	uint32_t junk;
 	char hostbuf[MAXHOSTNAMELEN];
 
+	if (xdrs->x_op == XDR_FREE)
+		/* This function does not allocate auxiliary memory. */
+		return (TRUE);
+
 	if (xdrs->x_op == XDR_ENCODE) {
-		/*
-		 * Restrict name length to 255 according to RFC 1057.
-		 */
 		getcredhostname(NULL, hostbuf, sizeof(hostbuf));
 		namelen = strlen(hostbuf);
-		if (namelen > 255)
-			namelen = 255;
-	} else {
+		if (namelen > AUTH_SYS_MAX_HOSTNAME)
+			namelen = AUTH_SYS_MAX_HOSTNAME;
+	} else
 		namelen = 0;
-	}
-	junk = 0;
 
-	if (!xdr_uint32_t(xdrs, time)
-	    || !xdr_uint32_t(xdrs, &namelen))
+	if (!xdr_uint32_t(xdrs, time) || !xdr_uint32_t(xdrs, &namelen))
 		return (FALSE);
 
 	/*
@@ -88,43 +81,65 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 		if (!xdr_opaque(xdrs, hostbuf, namelen))
 			return (FALSE);
 	} else {
+		if (namelen > AUTH_SYS_MAX_HOSTNAME)
+			return (FALSE);
 		xdr_setpos(xdrs, xdr_getpos(xdrs) + RNDUP(namelen));
 	}
 
 	if (!xdr_uint32_t(xdrs, &cred->cr_uid))
 		return (FALSE);
+
+	/*
+	 * Safety check: The protocol needs at least one group (access to
+	 * 'cr_gid', decrementation of 'cr_ngroups' below).
+	 */
+	if (xdrs->x_op == XDR_ENCODE && cred->cr_ngroups == 0)
+		return (FALSE);
 	if (!xdr_uint32_t(xdrs, &cred->cr_gid))
 		return (FALSE);
 
 	if (xdrs->x_op == XDR_ENCODE) {
 		/*
-		 * Note that this is a `struct xucred`, which maintains its
-		 * historical layout of preserving the egid in cr_ngroups and
-		 * cr_groups[0] == egid.
+		 * Note that this is a 'struct xucred', which still has the
+		 * historical layout where the effective GID is in cr_groups[0]
+		 * and is accounted in 'cr_ngroups'.  We substract 1 to obtain
+		 * the number of "supplementary" groups, passed in the AUTH_SYS
+		 * credentials variable-length array called gids[] in RFC 5531.
 		 */
-		ngroups = cred->cr_ngroups - 1;
-		if (ngroups > NGRPS)
-			ngroups = NGRPS;
+		MPASS(cred->cr_ngroups <= XU_NGROUPS);
+		supp_ngroups = cred->cr_ngroups - 1;
+		if (supp_ngroups > AUTH_SYS_MAX_GROUPS)
+			/* With current values, this should never execute. */
+			supp_ngroups = AUTH_SYS_MAX_GROUPS;
 	}
 
-	if (!xdr_uint32_t(xdrs, &ngroups))
+	if (!xdr_uint32_t(xdrs, &supp_ngroups))
 		return (FALSE);
-	for (i = 0; i < ngroups; i++) {
-		if (i < ngroups_max) {
-			if (!xdr_uint32_t(xdrs, &cred->cr_groups[i + 1]))
-				return (FALSE);
-		} else {
-			if (!xdr_uint32_t(xdrs, &junk))
-				return (FALSE);
-		}
-	}
 
-	if (xdrs->x_op == XDR_DECODE) {
-		if (ngroups > ngroups_max)
-			cred->cr_ngroups = ngroups_max + 1;
-		else
-			cred->cr_ngroups = ngroups + 1;
-	}
+	/*
+	 * Because we cannot store more than XU_NGROUPS in total (16 at time of
+	 * this writing), for now we choose to be strict with respect to RFC
+	 * 5531's maximum number of supplementary groups (AUTH_SYS_MAX_GROUPS).
+	 * That would also be an accidental DoS prevention measure if the
+	 * request handling code didn't try to reassemble it in full without any
+	 * size limits.  Although AUTH_SYS_MAX_GROUPS and XU_NGROUPS are equal,
+	 * since the latter includes the "effective" GID, we cannot store the
+	 * last group of a message with exactly AUTH_SYS_MAX_GROUPS
+	 * supplementary groups.  We accept such messages so as not to violate
+	 * the protocol, silently dropping the last group on the floor.
+	 */
+
+	if (xdrs->x_op != XDR_ENCODE && supp_ngroups > AUTH_SYS_MAX_GROUPS)
+		return (FALSE);
+
+	junk = 0;
+	for (i = 0; i < supp_ngroups; ++i)
+		if (!xdr_uint32_t(xdrs, i < XU_NGROUPS - 1 ?
+		    &cred->cr_sgroups[i] : &junk))
+			return (FALSE);
+
+	if (xdrs->x_op != XDR_ENCODE)
+		cred->cr_ngroups = MIN(supp_ngroups + 1, XU_NGROUPS);
 
 	return (TRUE);
 }
diff --git a/sys/rpc/svc_auth_unix.c b/sys/rpc/svc_auth_unix.c
index 963f4f272964..aa0fc585865f 100644
--- a/sys/rpc/svc_auth_unix.c
+++ b/sys/rpc/svc_auth_unix.c
@@ -41,18 +41,12 @@
  */
 
 #include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/systm.h>
 #include <sys/ucred.h>
 
 #include <rpc/rpc.h>
 
 #include <rpc/rpc_com.h>
 
-#define MAX_MACHINE_NAME	255
-#define NGRPS			16
-
 /*
  * Unix longhand authenticator
  */
@@ -62,11 +56,8 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
 	enum auth_stat stat;
 	XDR xdrs;
 	int32_t *buf;
-	uint32_t time;
 	struct xucred *xcr;
-	u_int auth_len;
-	size_t str_len, gid_len;
-	u_int i;
+	uint32_t auth_len, time;
 
 	xcr = rqst->rq_clntcred;
 	auth_len = (u_int)msg->rm_call.cb_cred.oa_length;
@@ -74,51 +65,58 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
 	    XDR_DECODE);
 	buf = XDR_INLINE(&xdrs, auth_len);
 	if (buf != NULL) {
+		/* 'time', 'str_len', UID, GID and 'supp_ngroups'. */
+		const uint32_t min_len = 5 * BYTES_PER_XDR_UNIT;
+		uint32_t str_len, supp_ngroups;
+
+		if (auth_len < min_len)
+			goto badcred;
 		time = IXDR_GET_UINT32(buf);
-		str_len = (size_t)IXDR_GET_UINT32(buf);
-		if (str_len > MAX_MACHINE_NAME) {
-			stat = AUTH_BADCRED;
-			goto done;
-		}
+		str_len = IXDR_GET_UINT32(buf);
+		if (str_len > AUTH_SYS_MAX_HOSTNAME)
+			goto badcred;
 		str_len = RNDUP(str_len);
+		/*
+		 * Recheck message length now that we know the value of
+		 * 'str_len' (and that it won't cause an overflow in additions
+		 * below) to protect access to the credentials part.
+		 */
+		if (auth_len < min_len + str_len)
+			goto badcred;
 		buf += str_len / sizeof (int32_t);
 		xcr->cr_uid = IXDR_GET_UINT32(buf);
 		xcr->cr_gid = IXDR_GET_UINT32(buf);
-		gid_len = (size_t)IXDR_GET_UINT32(buf);
-		if (gid_len > NGRPS) {
-			stat = AUTH_BADCRED;
-			goto done;
-		}
-		for (i = 0; i < gid_len; i++) {
-			/*
-			 * Note that this is a `struct xucred`, which maintains
-			 * its historical layout of preserving the egid in
-			 * cr_ngroups and cr_groups[0] == egid.
-			 */
-			if (i + 1 < XU_NGROUPS)
-				xcr->cr_groups[i + 1] = IXDR_GET_INT32(buf);
-			else
-				buf++;
-		}
-		if (gid_len + 1 > XU_NGROUPS)
-			xcr->cr_ngroups = XU_NGROUPS;
-		else
-			xcr->cr_ngroups = gid_len + 1;
+		supp_ngroups = IXDR_GET_UINT32(buf);
+		/*
+		 * See the herald comment before a similar test at the end of
+		 * xdr_authunix_parms() for why we strictly respect RFC 5531 and
+		 * why we may have to drop the last supplementary group when
+		 * there are AUTH_SYS_MAX_GROUPS of them.
+		 */
+		if (supp_ngroups > AUTH_SYS_MAX_GROUPS)
+			goto badcred;
+		/*
+		 * Final message length check, as we now know how much we will
+		 * read in total.
+		 */
+		if (auth_len < min_len + str_len +
+		    supp_ngroups * BYTES_PER_XDR_UNIT)
+			goto badcred;
 
 		/*
-		 * five is the smallest unix credentials structure -
-		 * timestamp, hostname len (0), uid, gid, and gids len (0).
+		 * Note that 'xcr' is a 'struct xucred', which still has the
+		 * historical layout where the effective GID is in cr_groups[0]
+		 * and is accounted in 'cr_ngroups'.
 		 */
-		if ((5 + gid_len) * BYTES_PER_XDR_UNIT + str_len > auth_len) {
-			(void) printf("bad auth_len gid %ld str %ld auth %u\n",
-			    (long)gid_len, (long)str_len, auth_len);
-			stat = AUTH_BADCRED;
-			goto done;
+		for (uint32_t i = 0; i < supp_ngroups; ++i) {
+			if (i < XU_NGROUPS - 1)
+				xcr->cr_sgroups[i] = IXDR_GET_INT32(buf);
+			else
+				buf++;
 		}
-	} else if (! xdr_authunix_parms(&xdrs, &time, xcr)) {
-		stat = AUTH_BADCRED;
-		goto done;
-	}
+		xcr->cr_ngroups = MIN(supp_ngroups + 1, XU_NGROUPS);
+	} else if (!xdr_authunix_parms(&xdrs, &time, xcr))
+		goto badcred;
 
 	rqst->rq_verf = _null_auth;
 	stat = AUTH_OK;
@@ -126,6 +124,10 @@ done:
 	XDR_DESTROY(&xdrs);
 
 	return (stat);
+
+badcred:
+	stat = AUTH_BADCRED;
+	goto done;
 }
 
 
diff --git a/sys/security/audit/audit.c b/sys/security/audit/audit.c
index 7ec50d990d4e..876776e5f62e 100644
--- a/sys/security/audit/audit.c
+++ b/sys/security/audit/audit.c
@@ -329,7 +329,7 @@ audit_record_dtor(void *mem, int size, void *arg)
  * call into the BSM assembly code to initialize it.
  */
 static void
-audit_init(void)
+audit_init(void *dummy __unused)
 {
 
 	audit_trail_enabled = 0;
diff --git a/sys/security/mac/mac_framework.c b/sys/security/mac/mac_framework.c
index d742b5dcbc3a..b0776160cc74 100644
--- a/sys/security/mac/mac_framework.c
+++ b/sys/security/mac/mac_framework.c
@@ -320,7 +320,7 @@ mac_policy_xlock_assert(void)
  * Initialize the MAC subsystem, including appropriate SMP locks.
  */
 static void
-mac_init(void)
+mac_init(void *dummy __unused)
 {
 
 	LIST_INIT(&mac_static_policy_list);
@@ -340,7 +340,7 @@ mac_init(void)
  * kernel, or loaded before the kernel startup.
  */
 static void
-mac_late_init(void)
+mac_late_init(void *dummy __unused)
 {
 
 	mac_late = 1;
diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h
index 2845a9dbc1e2..9e2a233248b4 100644
--- a/sys/sys/imgact_elf.h
+++ b/sys/sys/imgact_elf.h
@@ -86,7 +86,7 @@ typedef struct {
 	struct sysentvec *sysvec;
 	const char *interp_newpath;
 	int flags;
-	Elf_Brandnote *brand_note;
+	const Elf_Brandnote *brand_note;
 	bool		(*header_supported)(const struct image_params *,
 	    const int32_t *, const uint32_t *);
 		/* High 8 bits of flags is private to the ABI */
@@ -111,9 +111,9 @@ struct sseg_closure {
 	size_t size;            /* Total size of all writable segments. */
 };
 
-bool	__elfN(brand_inuse)(Elf_Brandinfo *entry);
-int	__elfN(insert_brand_entry)(Elf_Brandinfo *entry);
-int	__elfN(remove_brand_entry)(Elf_Brandinfo *entry);
+bool	__elfN(brand_inuse)(const Elf_Brandinfo *entry);
+int	__elfN(insert_brand_entry)(const Elf_Brandinfo *entry);
+int	__elfN(remove_brand_entry)(const Elf_Brandinfo *entry);
 int	__elfN(freebsd_fixup)(uintptr_t *, struct image_params *);
 int	__elfN(coredump)(struct thread *, struct coredump_writer *, off_t, int);
 size_t	__elfN(populate_note)(int, void *, void *, size_t, void **);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 9140cee56885..8c0729d3ec66 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -741,7 +741,7 @@ struct proc {
 					       reaper which spawned
 					       our subtree. */
 	uint64_t	p_elf_flags;	/* (x) ELF flags */
-	void		*p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
+	const void	*p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
 						 non ELF binaries. */
 	sbintime_t	p_umtx_min_timeout;
 /* End area that is copied on creation. */
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b4593f38f592..739723754b7d 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -62,7 +62,7 @@
 #include <sys/_sx.h>
 #include <sys/_task.h>
 
-#define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
+#define	SB_MAX		(8*1024*1024)	/* default for max chars in sockbuf */
 
 struct ktls_session;
 struct mbuf;
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index cdd4fa3b4b89..cf1d95da6168 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -396,6 +396,7 @@ struct sockproto {
 #define	PF_NETLINK	AF_NETLINK
 #define	PF_INET_SDP	AF_INET_SDP
 #define	PF_INET6_SDP	AF_INET6_SDP
+#define	PF_HYPERV	AF_HYPERV
 #define	PF_DIVERT	AF_DIVERT
 #define	PF_IPFWLOG	AF_IPFWLOG
 
diff --git a/sys/sys/sockopt.h b/sys/sys/sockopt.h
index bfe12d8510d7..d2b0ff5ed2c8 100644
--- a/sys/sys/sockopt.h
+++ b/sys/sys/sockopt.h
@@ -57,8 +57,10 @@ struct	sockopt {
 
 int	sosetopt(struct socket *so, struct sockopt *sopt);
 int	sogetopt(struct socket *so, struct sockopt *sopt);
-int	sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
-int	sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
+int __result_use_check sooptcopyin(struct sockopt *sopt, void *buf, size_t len,
+    size_t minlen);
+int __result_use_check sooptcopyout(struct sockopt *sopt, const void *buf,
+    size_t len);
 int	soopt_getm(struct sockopt *sopt, struct mbuf **mp);
 int	soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
 int	soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 1714fa5a7416..6de391dcc03e 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -343,8 +343,7 @@ void exec_free_abi_mappings(struct proc *p);
 void exec_onexec_old(struct thread *td);
 
 #define INIT_SYSENTVEC(name, sv)					\
-    SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY,				\
-	(sysinit_cfunc_t)exec_sysvec_init, sv);
+    SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, exec_sysvec_init, sv)
 
 #endif /* _KERNEL */
 
diff --git a/sys/sys/tree.h b/sys/sys/tree.h
index c11bccfb387c..194ad505b038 100644
--- a/sys/sys/tree.h
+++ b/sys/sys/tree.h
@@ -334,10 +334,13 @@ struct {								\
 #define _RB_L				((__uintptr_t)1)
 #define _RB_R				((__uintptr_t)2)
 #define _RB_LR				((__uintptr_t)3)
-#define _RB_BITS(elm)			(*(__uintptr_t *)&elm)
+#define _RB_BITS(elm)			((__uintptr_t)elm)
 #define _RB_BITSUP(elm, field)		_RB_BITS(_RB_UP(elm, field))
-#define _RB_PTR(elm)			(__typeof(elm))			\
-					((__uintptr_t)elm & ~_RB_LR)
+#define _RB_PTR_OP(elm, op, dir)	((__typeof(elm))		\
+					((__uintptr_t)(elm) op (dir)))
+#define _RB_PTR(elm)			_RB_PTR_OP((elm), &, ~_RB_LR)
+#define _RB_MOD_OR(elm, dir)		((elm) = _RB_PTR_OP((elm), |, (dir)))
+#define _RB_MOD_XOR(elm, dir)		((elm) = _RB_PTR_OP((elm), ^, (dir)))
 
 #define RB_PARENT(elm, field)		_RB_PTR(_RB_UP(elm, field))
 #define RB_LEFT(elm, field)		_RB_LINK(elm, _RB_L, field)
@@ -346,8 +349,8 @@ struct {								\
 #define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
 
 #define RB_SET_PARENT(dst, src, field) do {				\
-	_RB_BITSUP(dst, field) = (__uintptr_t)src |			\
-	    (_RB_BITSUP(dst, field) & _RB_LR);				\
+	_RB_UP(dst, field) = (__typeof(src))((__uintptr_t)src |		\
+	    (_RB_BITSUP(dst, field) & _RB_LR));				\
 } while (/*CONSTCOND*/ 0)
 
 #define RB_SET(elm, parent, field) do {					\
@@ -546,12 +549,12 @@ name##_RB_INSERT_COLOR(struct name *head,				\
 		elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \
 		if (_RB_BITS(gpar) & elmdir) {				\
 			/* shorten the parent-elm edge to rebalance */	\
-			_RB_BITSUP(parent, field) ^= elmdir;		\
+			_RB_MOD_XOR(_RB_UP(parent, field), elmdir);	\
 			return (NULL);					\
 		}							\
 		sibdir = elmdir ^ _RB_LR;				\
 		/* the other edge must change length */			\
-		_RB_BITSUP(parent, field) ^= sibdir;			\
+		_RB_MOD_XOR(_RB_UP(parent, field), sibdir);		\
 		if ((_RB_BITS(gpar) & _RB_LR) == 0) {			\
 			/* both edges now short, retry from parent */	\
 			child = elm;					\
@@ -583,11 +586,14 @@ name##_RB_INSERT_COLOR(struct name *head,				\
 			RB_ROTATE(elm, child, elmdir, field);		\
 			child_up = _RB_UP(child, field);		\
 			if (_RB_BITS(child_up) & sibdir)		\
-				_RB_BITSUP(parent, field) ^= elmdir;	\
+				_RB_MOD_XOR(_RB_UP(parent, field),	\
+				    elmdir); 				\
 			if (_RB_BITS(child_up) & elmdir)		\
-				_RB_BITSUP(elm, field) ^= _RB_LR;	\
+				_RB_MOD_XOR(_RB_UP(elm, field),		\
+				    _RB_LR);				\
 			else						\
-				_RB_BITSUP(elm, field) ^= elmdir;	\
+				_RB_MOD_XOR(_RB_UP(elm, field),		\
+				    elmdir);				\
 			/* if child is a leaf, don't augment elm,	\
 			 * since it is restored to be a leaf again. */	\
 			if ((_RB_BITS(child_up) & _RB_LR) == 0)		\
@@ -656,7 +662,7 @@ name##_RB_REMOVE_COLOR(struct name *head,				\
 		/* the rank of the tree rooted at elm shrank */		\
 		gpar = _RB_UP(parent, field);				\
 		elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \
-		_RB_BITS(gpar) ^= elmdir;				\
+		_RB_MOD_XOR(gpar, elmdir);				\
 		if (_RB_BITS(gpar) & elmdir) {				\
 			/* lengthen the parent-elm edge to rebalance */	\
 			_RB_UP(parent, field) = gpar;			\
@@ -664,7 +670,7 @@ name##_RB_REMOVE_COLOR(struct name *head,				\
 		}							\
 		if (_RB_BITS(gpar) & _RB_LR) {				\
 			/* shorten other edge, retry from parent */	\
-			_RB_BITS(gpar) ^= _RB_LR;			\
+			_RB_MOD_XOR(gpar, _RB_LR);			\
 			_RB_UP(parent, field) = gpar;			\
 			gpar = _RB_PTR(gpar);				\
 			continue;					\
@@ -672,7 +678,7 @@ name##_RB_REMOVE_COLOR(struct name *head,				\
 		sibdir = elmdir ^ _RB_LR;				\
 		sib = _RB_LINK(parent, sibdir, field);			\
 		up = _RB_UP(sib, field);				\
-		_RB_BITS(up) ^= _RB_LR;					\
+		_RB_MOD_XOR(up, _RB_LR);				\
 		if ((_RB_BITS(up) & _RB_LR) == 0) {			\
 			/* shorten edges descending from sib, retry */	\
 			_RB_UP(sib, field) = up;			\
@@ -703,24 +709,29 @@ name##_RB_REMOVE_COLOR(struct name *head,				\
 			/* elm is a 1-child.  First rotate at elm. */	\
 			RB_ROTATE(sib, elm, sibdir, field);		\
 			up = _RB_UP(elm, field);			\
-			_RB_BITSUP(parent, field) ^=			\
-			    (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir;	\
-			_RB_BITSUP(sib, field) ^=			\
-			    (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir;	\
-			_RB_BITSUP(elm, field) |= _RB_LR;		\
+			_RB_MOD_XOR(_RB_UP(parent, field),		\
+			    (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir);	\
+			_RB_MOD_XOR(_RB_UP(sib, field),			\
+			    (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir);	\
+			_RB_MOD_OR(_RB_UP(elm, field), _RB_LR);		\
 		} else {						\
 			if ((_RB_BITS(up) & elmdir) == 0 &&		\
 			    RB_STRICT_HST && elm != NULL) {		\
 				/* if parent does not become a leaf,	\
 				   do not demote parent yet. */		\
-				_RB_BITSUP(parent, field) ^= sibdir;	\
-				_RB_BITSUP(sib, field) ^= _RB_LR;	\
+				_RB_MOD_XOR(_RB_UP(parent, field),	\
+				    sibdir);				\
+				_RB_MOD_XOR(_RB_UP(sib, field),		\
+				    _RB_LR);				\
 			} else if ((_RB_BITS(up) & elmdir) == 0) {	\
 				/* demote parent. */			\
-				_RB_BITSUP(parent, field) ^= elmdir;	\
-				_RB_BITSUP(sib, field) ^= sibdir;	\
+				_RB_MOD_XOR(_RB_UP(parent, field),	\
+				    elmdir);				\
+				_RB_MOD_XOR(_RB_UP(sib, field),		\
+				    sibdir);				\
 			} else						\
-				_RB_BITSUP(sib, field) ^= sibdir;	\
+				_RB_MOD_XOR(_RB_UP(sib, field),		\
+				    sibdir);				\
 			elm = sib;					\
 		}							\
 									\
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 3183f0792256..1704bc089d85 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -617,7 +617,8 @@ struct kinfo_vmobject {
 	} kvo_type_spec;			/* Type-specific union */
 	uint64_t kvo_me;			/* Uniq handle for anon obj */
 	uint64_t kvo_laundry;			/* Number of laundry pages. */
-	uint64_t _kvo_qspare[5];
+	uint64_t kvo_wired;			/* Number of wired pages. */
+	uint64_t _kvo_qspare[4];
 	uint32_t kvo_swapped;			/* Number of swapped pages */
 	uint32_t kvo_flags;
 	uint32_t _kvo_ispare[6];
diff --git a/sys/tests/ktest.h b/sys/tests/ktest.h
index c767aa31e8e5..75d7a75e2fff 100644
--- a/sys/tests/ktest.h
+++ b/sys/tests/ktest.h
@@ -57,6 +57,8 @@ struct ktest_test_info {
 	ktest_parse_t	parse;
 };
 
+#define KTEST_FUNC(X) static int __ktest_##X(struct ktest_test_context *ctx)
+
 struct ktest_module_info {
 	const char			*name;
 	const struct ktest_test_info	*tests;
@@ -64,6 +66,8 @@ struct ktest_module_info {
 	void				*module_ptr;
 };
 
+#define KTEST_INFO(X) { "test_" #X, "Test " #X, __ktest_##X, NULL }
+
 int ktest_default_modevent(module_t mod, int type, void *arg);
 
 bool ktest_start_msg(struct ktest_test_context *ctx);
@@ -84,6 +88,9 @@ void ktest_end_msg(struct ktest_test_context *ctx);
 #define	KTEST_LOG(_ctx, _fmt, ...)					\
 	KTEST_LOG_LEVEL(_ctx, LOG_DEBUG, _fmt, ## __VA_ARGS__)
 
+#define	KTEST_ERR(_ctx, _fmt, ...)	\
+	KTEST_LOG_LEVEL(_ctx, LOG_ERR, _fmt, ## __VA_ARGS__)
+
 #define KTEST_MAX_BUF	512
 
 #define	KTEST_MODULE_DECLARE(_n, _t) 					\
@@ -104,6 +111,9 @@ MODULE_VERSION(ktest_##_n, 1);						\
 MODULE_DEPEND(ktest_##_n, ktestmod, 1, 1, 1);				\
 MODULE_DEPEND(ktest_##_n, netlink, 1, 1, 1);				\
 
+#define	KTEST_MODULE_DEPEND(_n, _d)		\
+MODULE_DEPEND(ktest_##_n, _d, 1, 1, 1);	\
+
 #endif /* _KERNEL */
 
 /* genetlink definitions */
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 970536a13aa5..f47cfd08f75a 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -653,8 +653,8 @@ done:
 	for (i = 0; i < UFS_NDADDR; i++)
 		if (newblks[i] != DIP(ip, i_db[i]))
 			panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
-			    i, (intmax_t)newblks[UFS_NDADDR + level],
-			    (intmax_t)DIP(ip, i_ib[level]));
+			    i, (intmax_t)newblks[i],
+			    (intmax_t)DIP(ip, i_db[i]));
 	BO_LOCK(bo);
 	if (length == 0 &&
 	    (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 679b2e20e88b..b80b5cc781f7 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -4009,21 +4009,15 @@ restart:
 	/*
 	 * Use the keg's policy if upper layers haven't already specified a
 	 * domain (as happens with first-touch zones).
-	 *
-	 * To avoid races we run the iterator with the keg lock held, but that
-	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
-	 * clear M_WAITOK and handle low memory conditions locally.
 	 */
 	rr = rdomain == UMA_ANYDOMAIN;
+	aflags = flags;
 	if (rr) {
-		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
 		if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags) != 0)
 			return (NULL);
-	} else {
-		aflags = flags;
+	} else
 		domain = rdomain;
-	}
 
 	for (;;) {
 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
@@ -4053,13 +4047,8 @@ restart:
 			if ((flags & M_WAITOK) == 0)
 				break;
 			vm_wait_domain(domain);
-		} else if (vm_domainset_iter_policy(&di, &domain) != 0) {
-			if ((flags & M_WAITOK) != 0) {
-				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
-				goto restart;
-			}
+		} else if (vm_domainset_iter_policy(&di, &domain) != 0)
 			break;
-		}
 	}
 
 	/*
@@ -5245,7 +5234,7 @@ uma_prealloc(uma_zone_t zone, int items)
 	KEG_GET(zone, keg);
 	slabs = howmany(items, keg->uk_ipers);
 	while (slabs-- > 0) {
-		aflags = M_NOWAIT;
+		aflags = M_WAITOK;
 		if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags) != 0)
 			panic("%s: Domainset is empty", __func__);
@@ -5266,7 +5255,8 @@ uma_prealloc(uma_zone_t zone, int items)
 				break;
 			}
 			if (vm_domainset_iter_policy(&di, &domain) != 0)
-				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
+				panic("%s: Cannot allocate from any domain",
+				    __func__);
 		}
 	}
 }
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index 9fa17da954f7..c25ed0cc2267 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -113,7 +113,6 @@ vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
 	int d;
 
 	d = di->di_offset % di->di_domain->ds_cnt;
-	*di->di_iter = d;
 	*domain = di->di_domain->ds_order[d];
 }
 
@@ -260,9 +259,14 @@ vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
 	 * are immutable and unsynchronized.  Updates can race but pointer
 	 * loads are assumed to be atomic.
 	 */
-	if (obj != NULL && obj->domain.dr_policy != NULL)
+	if (obj != NULL && obj->domain.dr_policy != NULL) {
+		/*
+		 * This write lock protects non-atomic increments of the
+		 * iterator index in vm_domainset_iter_rr().
+		 */
+		VM_OBJECT_ASSERT_WLOCKED(obj);
 		dr = &obj->domain;
-	else
+	} else
 		dr = &curthread->td_domain;
 
 	vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index e0f1807a1b32..18d789c59281 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -441,19 +441,16 @@ vm_thread_kstack_arena_release(void *arena, vmem_addr_t addr, vmem_size_t size)
  * Create the kernel stack for a new thread.
  */
 static vm_offset_t
-vm_thread_stack_create(struct domainset *ds, int pages)
+vm_thread_stack_create(struct domainset *ds, int pages, int flags)
 {
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	struct vm_domainset_iter di;
-	int req = VM_ALLOC_NORMAL;
-	vm_object_t obj;
+	int req;
 	vm_offset_t ks;
 	int domain, i;
 
-	obj = vm_thread_kstack_size_to_obj(pages);
-	if (vm_ndomains > 1)
-		obj->domain.dr_policy = ds;
-	vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
+	vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+	req = malloc2vm_flags(flags);
 	do {
 		/*
 		 * Get a kernel virtual address for this thread's kstack.
@@ -480,7 +477,7 @@ vm_thread_stack_create(struct domainset *ds, int pages)
 			vm_page_valid(ma[i]);
 		pmap_qenter(ks, ma, pages);
 		return (ks);
-	} while (vm_domainset_iter_page(&di, obj, &domain, NULL) == 0);
+	} while (vm_domainset_iter_policy(&di, &domain) == 0);
 
 	return (0);
 }
@@ -532,15 +529,9 @@ vm_thread_new(struct thread *td, int pages)
 	ks = 0;
 	if (pages == kstack_pages && kstack_cache != NULL)
 		ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT);
-
-	/*
-	 * Ensure that kstack objects can draw pages from any memory
-	 * domain.  Otherwise a local memory shortage can block a process
-	 * swap-in.
-	 */
 	if (ks == 0)
 		ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)),
-		    pages);
+		    pages, M_NOWAIT);
 	if (ks == 0)
 		return (0);
 
@@ -660,7 +651,8 @@ kstack_import(void *arg, void **store, int cnt, int domain, int flags)
 		ds = DOMAINSET_PREF(domain);
 
 	for (i = 0; i < cnt; i++) {
-		store[i] = (void *)vm_thread_stack_create(ds, kstack_pages);
+		store[i] = (void *)vm_thread_stack_create(ds, kstack_pages,
+		    flags);
 		if (store[i] == NULL)
 			break;
 	}
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index fef28bb883e4..fee50f49c844 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -96,7 +96,7 @@ struct vmmeter __read_mostly vm_cnt = {
 u_long __exclusive_cache_line vm_user_wire_count;
 
 static void
-vmcounter_startup(void)
+vmcounter_startup(void *dummy __unused)
 {
 	counter_u64_t *cnt = (counter_u64_t *)&vm_cnt;
 
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 6d9ea8bf9d93..5b4517d2bf0c 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2522,15 +2522,13 @@ vm_object_list_handler(struct sysctl_req *req, bool swap_only)
 			continue;
 		}
 		mtx_unlock(&vm_object_list_mtx);
+
+		memset(kvo, 0, sizeof(*kvo));
 		kvo->kvo_size = ptoa(obj->size);
 		kvo->kvo_resident = obj->resident_page_count;
 		kvo->kvo_ref_count = obj->ref_count;
 		kvo->kvo_shadow_count = atomic_load_int(&obj->shadow_count);
 		kvo->kvo_memattr = obj->memattr;
-		kvo->kvo_active = 0;
-		kvo->kvo_inactive = 0;
-		kvo->kvo_laundry = 0;
-		kvo->kvo_flags = 0;
 		if (!swap_only) {
 			vm_page_iter_init(&pages, obj);
 			VM_RADIX_FOREACH(m, &pages) {
@@ -2549,12 +2547,12 @@ vm_object_list_handler(struct sysctl_req *req, bool swap_only)
 					kvo->kvo_inactive++;
 				else if (vm_page_in_laundry(m))
 					kvo->kvo_laundry++;
+
+				if (vm_page_wired(m))
+					kvo->kvo_wired++;
 			}
 		}
 
-		kvo->kvo_vn_fileid = 0;
-		kvo->kvo_vn_fsid = 0;
-		kvo->kvo_vn_fsid_freebsd11 = 0;
 		freepath = NULL;
 		fullpath = "";
 		vp = NULL;
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 3f1be78342c9..418a9cff8abf 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -120,7 +120,7 @@
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
-static void vm_pageout_init(void);
+static void vm_pageout_init(void *);
 static int vm_pageout_clean(vm_page_t m, int *numpagedout);
 static int vm_pageout_cluster(vm_page_t m);
 static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
@@ -2333,7 +2333,7 @@ vm_pageout_init_domain(int domain)
 }
 
 static void
-vm_pageout_init(void)
+vm_pageout_init(void *dummy __unused)
 {
 	u_long freecount;
 	int i;
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index 4b40f343ac90..735efe307215 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -46,9 +46,11 @@
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <machine/intr_machdep.h>
@@ -135,6 +137,11 @@ SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW,
     "Bank to use for artificial MCAs (testing purpose only)");
 #endif
 
+static bool mca_uselog = false;
+SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0,
+    "Should the system send non-fatal machine check errors to the log "
+    "(instead of the console)?");
+
 static STAILQ_HEAD(, mca_internal) mca_freelist;
 static int mca_freecount;
 static STAILQ_HEAD(, mca_internal) mca_records;
@@ -147,12 +154,40 @@ static struct timeout_task mca_scan_task;
 static struct mtx mca_lock;
 static bool mca_startup_done = false;
 
-/* Statistics on number of MCA events by type, updated atomically. */
+/* Static buffer to compose messages while in an interrupt context. */
+static char mca_msg_buf[1024];
+static struct mtx mca_msg_buf_lock;
+
+/* Statistics on number of MCA events by type, updated with the mca_lock. */
 static uint64_t mca_stats[MCA_T_COUNT];
 SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP,
     mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]),
     "S", "Array of MCA events by type");
 
+/* Variables to track and control message rate limiting. */
+static struct timeval mca_last_log_time;
+static struct timeval mca_log_interval;
+static int mca_log_skipped;
+
+static int
+sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	u_int val;
+
+	val = mca_log_interval.tv_sec;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	mca_log_interval.tv_sec = val;
+	return (0);
+}
+SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval,
+    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0,
+    sysctl_mca_log_interval, "IU",
+    "Minimum number of seconds between logging correctable MCAs"
+    " (0 = no limit)");
+
 static unsigned int
 mca_ia32_ctl_reg(int bank)
 {
@@ -448,98 +483,111 @@ mca_mute(const struct mca_record *rec)
 
 /* Dump details about a single machine check. */
 static void
-mca_log(const struct mca_record *rec)
+mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal)
 {
+	int error, numskipped;
 	uint16_t mca_error;
 	enum mca_stat_types event_type;
+	struct sbuf sb;
+	bool uncor, using_shared_buf;
 
 	if (mca_mute(rec))
 		return;
 
-	if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 &&
-	    (!tes_supported(rec->mr_mcg_cap) ||
+	uncor = (rec->mr_status & MC_STATUS_UC) != 0;
+
+	if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) ||
 	    ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2))
 		return;
 
-	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
+	/* Try to use an allocated buffer when not in an interrupt context. */
+	if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL)
+		using_shared_buf = false;
+	else {
+		using_shared_buf = true;
+		mtx_lock_spin(&mca_msg_buf_lock);
+		sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN);
+	}
+
+	sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
 	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
-	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
-	    rec->mr_cpu_id, rec->mr_apic_id);
-	printf("MCA: CPU %d ", rec->mr_cpu);
+	sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n",
+	    cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id);
+	sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
-		printf("UNCOR ");
+		sbuf_printf(&sb, "UNCOR ");
 	else {
-		printf("COR ");
+		sbuf_printf(&sb, "COR ");
 		if (cmci_supported(rec->mr_mcg_cap))
-			printf("(%lld) ", ((long long)rec->mr_status &
+			sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status &
 			    MC_STATUS_COR_COUNT) >> 38);
 		if (tes_supported(rec->mr_mcg_cap)) {
 			switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) {
 			case 0x1:
-				printf("(Green) ");
+				sbuf_printf(&sb, "(Green) ");
 				break;
 			case 0x2:
-				printf("(Yellow) ");
+				sbuf_printf(&sb, "(Yellow) ");
 				break;
 			}
 		}
 	}
 	if (rec->mr_status & MC_STATUS_EN)
-		printf("EN ");
+		sbuf_printf(&sb, "EN ");
 	if (rec->mr_status & MC_STATUS_PCC)
-		printf("PCC ");
+		sbuf_printf(&sb, "PCC ");
 	if (ser_supported(rec->mr_mcg_cap)) {
 		if (rec->mr_status & MC_STATUS_S)
-			printf("S ");
+			sbuf_printf(&sb, "S ");
 		if (rec->mr_status & MC_STATUS_AR)
-			printf("AR ");
+			sbuf_printf(&sb, "AR ");
 	}
 	if (rec->mr_status & MC_STATUS_OVER)
-		printf("OVER ");
+		sbuf_printf(&sb, "OVER ");
 	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
 	event_type = MCA_T_COUNT;
 	switch (mca_error) {
 		/* Simple error codes. */
 	case 0x0000:
-		printf("no error");
+		sbuf_printf(&sb, "no error");
 		event_type = MCA_T_NONE;
 		break;
 	case 0x0001:
-		printf("unclassified error");
+		sbuf_printf(&sb, "unclassified error");
 		event_type = MCA_T_UNCLASSIFIED;
 		break;
 	case 0x0002:
-		printf("ucode ROM parity error");
+		sbuf_printf(&sb, "ucode ROM parity error");
 		event_type = MCA_T_UCODE_ROM_PARITY;
 		break;
 	case 0x0003:
-		printf("external error");
+		sbuf_printf(&sb, "external error");
 		event_type = MCA_T_EXTERNAL;
 		break;
 	case 0x0004:
-		printf("FRC error");
+		sbuf_printf(&sb, "FRC error");
 		event_type = MCA_T_FRC;
 		break;
 	case 0x0005:
-		printf("internal parity error");
+		sbuf_printf(&sb, "internal parity error");
 		event_type = MCA_T_INTERNAL_PARITY;
 		break;
 	case 0x0006:
-		printf("SMM handler code access violation");
+		sbuf_printf(&sb, "SMM handler code access violation");
 		event_type = MCA_T_SMM_HANDLER;
 		break;
 	case 0x0400:
-		printf("internal timer error");
+		sbuf_printf(&sb, "internal timer error");
 		event_type = MCA_T_INTERNAL_TIMER;
 		break;
 	case 0x0e0b:
-		printf("generic I/O error");
+		sbuf_printf(&sb, "generic I/O error");
 		event_type = MCA_T_GENERIC_IO;
 		if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL &&
 		    (rec->mr_status & MC_STATUS_MISCV)) {
-			printf(" (pci%d:%d:%d:%d)",
+			sbuf_printf(&sb, " (pci%d:%d:%d:%d)",
 			    (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32),
 			    (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24),
 			    (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19),
@@ -548,7 +596,8 @@ mca_log(const struct mca_record *rec)
 		break;
 	default:
 		if ((mca_error & 0xfc00) == 0x0400) {
-			printf("internal error %x", mca_error & 0x03ff);
+			sbuf_printf(&sb, "internal error %x",
+			    mca_error & 0x03ff);
 			event_type = MCA_T_INTERNAL;
 			break;
 		}
@@ -557,14 +606,16 @@ mca_log(const struct mca_record *rec)
 
 		/* Memory hierarchy error. */
 		if ((mca_error & 0xeffc) == 0x000c) {
-			printf("%s memory error", mca_error_level(mca_error));
+			sbuf_printf(&sb, "%s memory error",
+			    mca_error_level(mca_error));
 			event_type = MCA_T_MEMORY;
 			break;
 		}
 
 		/* TLB error. */
 		if ((mca_error & 0xeff0) == 0x0010) {
-			printf("%sTLB %s error", mca_error_ttype(mca_error),
+			sbuf_printf(&sb, "%sTLB %s error",
+			    mca_error_ttype(mca_error),
 			    mca_error_level(mca_error));
 			event_type = MCA_T_TLB;
 			break;
@@ -572,19 +623,19 @@ mca_log(const struct mca_record *rec)
 
 		/* Memory controller error. */
 		if ((mca_error & 0xef80) == 0x0080) {
-			printf("%s channel ", mca_error_mmtype(mca_error,
-			    &event_type));
+			sbuf_printf(&sb, "%s channel ",
+			    mca_error_mmtype(mca_error, &event_type));
 			if ((mca_error & 0x000f) != 0x000f)
-				printf("%d", mca_error & 0x000f);
+				sbuf_printf(&sb, "%d", mca_error & 0x000f);
 			else
-				printf("??");
-			printf(" memory error");
+				sbuf_printf(&sb, "??");
+			sbuf_printf(&sb, " memory error");
 			break;
 		}
 
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
-			printf("%sCACHE %s %s error",
+			sbuf_printf(&sb, "%sCACHE %s %s error",
 			    mca_error_ttype(mca_error),
 			    mca_error_level(mca_error),
 			    mca_error_request(mca_error));
@@ -594,77 +645,129 @@ mca_log(const struct mca_record *rec)
 
 		/* Extended memory error. */
 		if ((mca_error & 0xef80) == 0x0280) {
-			printf("%s channel ", mca_error_mmtype(mca_error,
-			    &event_type));
+			sbuf_printf(&sb, "%s channel ",
+			    mca_error_mmtype(mca_error, &event_type));
 			if ((mca_error & 0x000f) != 0x000f)
-				printf("%d", mca_error & 0x000f);
+				sbuf_printf(&sb, "%d", mca_error & 0x000f);
 			else
-				printf("??");
-			printf(" extended memory error");
+				sbuf_printf(&sb, "??");
+			sbuf_printf(&sb, " extended memory error");
 			break;
 		}
 
 		/* Bus and/or Interconnect error. */
 		if ((mca_error & 0xe800) == 0x0800) {
-			printf("BUS%s ", mca_error_level(mca_error));
+			sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error));
 			event_type = MCA_T_BUS;
 			switch ((mca_error & 0x0600) >> 9) {
 			case 0:
-				printf("Source");
+				sbuf_printf(&sb, "Source");
 				break;
 			case 1:
-				printf("Responder");
+				sbuf_printf(&sb, "Responder");
 				break;
 			case 2:
-				printf("Observer");
+				sbuf_printf(&sb, "Observer");
 				break;
 			default:
-				printf("???");
+				sbuf_printf(&sb, "???");
 				break;
 			}
-			printf(" %s ", mca_error_request(mca_error));
+			sbuf_printf(&sb, " %s ", mca_error_request(mca_error));
 			switch ((mca_error & 0x000c) >> 2) {
 			case 0:
-				printf("Memory");
+				sbuf_printf(&sb, "Memory");
 				break;
 			case 2:
-				printf("I/O");
+				sbuf_printf(&sb, "I/O");
 				break;
 			case 3:
-				printf("Other");
+				sbuf_printf(&sb, "Other");
 				break;
 			default:
-				printf("???");
+				sbuf_printf(&sb, "???");
 				break;
 			}
 			if (mca_error & 0x0100)
-				printf(" timed out");
+				sbuf_printf(&sb, " timed out");
 			break;
 		}
 
-		printf("unknown error %x", mca_error);
+		sbuf_printf(&sb, "unknown error %x", mca_error);
 		event_type = MCA_T_UNKNOWN;
 		break;
 	}
-	printf("\n");
+	sbuf_printf(&sb, "\n");
 	if (rec->mr_status & MC_STATUS_ADDRV) {
-		printf("MCA: Address 0x%llx", (long long)rec->mr_addr);
+		sbuf_printf(&sb, "MCA: Address 0x%llx",
+		    (long long)rec->mr_addr);
 		if (ser_supported(rec->mr_mcg_cap) &&
 		    (rec->mr_status & MC_STATUS_MISCV)) {
-			printf(" (Mode: %s, LSB: %d)",
+			sbuf_printf(&sb, " (Mode: %s, LSB: %d)",
 			    mca_addres_mode(rec->mr_misc),
 			    (int)(rec->mr_misc & MC_MISC_RA_LSB));
 		}
-		printf("\n");
+		sbuf_printf(&sb, "\n");
 	}
 	if (rec->mr_status & MC_STATUS_MISCV)
-		printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+		sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+
 	if (event_type < 0 || event_type >= MCA_T_COUNT) {
 		KASSERT(0, ("%s: invalid event type (%d)", __func__,
 		    event_type));
 		event_type = MCA_T_UNKNOWN;
 	}
-	atomic_add_64(&mca_stats[event_type], 1);
+	numskipped = 0;
+	if (!fatal && !uncor) {
+		/*
+		 * Update statistics and check the rate limit for
+		 * correctable errors. The rate limit is only applied
+		 * after the system records a reasonable number of errors
+		 * of the same type. The goal is to reduce the impact of
+		 * the system seeing and attempting to log a burst of
+		 * similar errors, which (especially when printed to the
+		 * console) can be expensive.
+		 */
+		mtx_lock_spin(&mca_lock);
+		mca_stats[event_type]++;
+		if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 &&
+		    ratecheck(&mca_last_log_time, &mca_log_interval) == 0) {
+			mca_log_skipped++;
+			mtx_unlock_spin(&mca_lock);
+			goto done;
+		}
+		numskipped = mca_log_skipped;
+		mca_log_skipped = 0;
+		mtx_unlock_spin(&mca_lock);
+	}
+
+	error = sbuf_finish(&sb);
+	if (fatal || !mca_uselog) {
+		if (numskipped > 0)
+			printf("MCA: %d events skipped due to rate limit\n",
+			    numskipped);
+		if (error)
+			printf("MCA: error logging message (sbuf error %d)\n",
+			    error);
+		else
+			sbuf_putbuf(&sb);
+	} else {
+		if (numskipped > 0)
+			log(LOG_ERR,
+			    "MCA: %d events skipped due to rate limit\n",
+			    numskipped);
+		if (error)
+			log(LOG_ERR,
+			    "MCA: error logging message (sbuf error %d)\n",
+			    error);
+		else
+			log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb));
+	}
+
+done:
+	sbuf_delete(&sb);
+	if (using_shared_buf)
+		mtx_unlock_spin(&mca_msg_buf_lock);
 }
 
 static bool
@@ -825,7 +928,7 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record)
 		if (rec == NULL) {
 			mtx_unlock_spin(&mca_lock);
 			printf("MCA: Unable to allocate space for an event.\n");
-			mca_log(record);
+			mca_log(mode, record, false);
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&mca_freelist, link);
@@ -982,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep)
 			if (*recoverablep)
 				mca_record_entry(mode, &rec);
 			else
-				mca_log(&rec);
+				mca_log(mode, &rec, true);
 		}
 
 #ifdef DEV_APIC
@@ -1066,7 +1169,7 @@ mca_process_records(enum scan_mode mode)
 	mtx_unlock_spin(&mca_lock);
 
 	STAILQ_FOREACH(mca, &tmplist, link)
-		mca_log(&mca->rec);
+		mca_log(mode, &mca->rec, false);
 
 	mtx_lock_spin(&mca_lock);
 	while ((mca = STAILQ_FIRST(&tmplist)) != NULL) {
@@ -1231,6 +1334,7 @@ mca_setup(uint64_t mcg_cap)
 
 	mca_banks = mcg_cap & MCG_CAP_COUNT;
 	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
+	mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN);
 	STAILQ_INIT(&mca_records);
 	STAILQ_INIT(&mca_pending);
 	mca_tq = taskqueue_create_fast("mca", M_WAITOK,
diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index a1a5d8140b14..3b873d9dae73 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -650,7 +650,7 @@ retry:
 #endif /* SMP */
 
 static void
-init_TSC_tc(void)
+init_TSC_tc(void *dummy __unused)
 {
 	uint64_t max_freq;
 	int shift;
diff --git a/sys/x86/xen/xen_apic.c b/sys/x86/xen/xen_apic.c
index 994dc3e0804c..43a253cc2860 100644
--- a/sys/x86/xen/xen_apic.c
+++ b/sys/x86/xen/xen_apic.c
@@ -330,7 +330,7 @@ xen_cpu_ipi_init(int cpu)
 }
 
 static void
-xen_setup_cpus(void)
+xen_setup_cpus(void *dummy __unused)
 {
 	uint32_t regs[4];
 	int i;