144 files changed, 3707 insertions, 1716 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index cd8ab58a07ab..8df082f6c5dc 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -481,6 +481,8 @@ vm_paddr_t		KERNend;	/* and the end */
 
 struct kva_layout_s	kva_layout = {
 	.kva_min =	KV4ADDR(PML4PML4I, 0, 0, 0),
+	.kva_max =	KV4ADDR(NPML4EPG - 1, NPDPEPG - 1,
+			    NPDEPG - 1, NPTEPG - 1),
 	.dmap_low =	KV4ADDR(DMPML4I, 0, 0, 0),
 	.dmap_high =	KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0),
 	.lm_low =	KV4ADDR(LMSPML4I, 0, 0, 0),
@@ -489,10 +491,20 @@ struct kva_layout_s	kva_layout = {
 	.km_high =	KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1,
 			    NPDEPG - 1, NPTEPG - 1),
 	.rec_pt =	KV4ADDR(PML4PML4I, 0, 0, 0),
+	.kasan_shadow_low = KV4ADDR(KASANPML4I, 0, 0, 0),
+	.kasan_shadow_high = KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0),
+	.kmsan_shadow_low = KV4ADDR(KMSANSHADPML4I, 0, 0, 0),
+	.kmsan_shadow_high = KV4ADDR(KMSANSHADPML4I + NKMSANSHADPML4E,
+			    0, 0, 0),
+	.kmsan_origin_low = KV4ADDR(KMSANORIGPML4I, 0, 0, 0),
+	.kmsan_origin_high = KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E,
+			    0, 0, 0),
 };
 
 struct kva_layout_s	kva_layout_la57 = {
 	.kva_min =	KV5ADDR(NPML5EPG / 2, 0, 0, 0, 0),	/* == rec_pt */
+	.kva_max =	KV5ADDR(NPML5EPG - 1, NPML4EPG - 1, NPDPEPG - 1,
+			    NPDEPG - 1, NPTEPG - 1),
 	.dmap_low =	KV5ADDR(DMPML5I, 0, 0, 0, 0),
 	.dmap_high =	KV5ADDR(DMPML5I + NDMPML5E, 0, 0, 0, 0),
 	.lm_low =	KV5ADDR(LMSPML5I, 0, 0, 0, 0),
@@ -501,6 +513,14 @@ struct kva_layout_s	kva_layout_la57 = {
 	.km_high =	KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1,
 			    NPDEPG - 1, NPTEPG - 1),
 	.rec_pt =	KV5ADDR(PML5PML5I, 0, 0, 0, 0),
+	.kasan_shadow_low = KV4ADDR(KASANPML4I, 0, 0, 0),
+	.kasan_shadow_high = KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0),
+	.kmsan_shadow_low = KV4ADDR(KMSANSHADPML4I, 0, 0, 0),
+	.kmsan_shadow_high = KV4ADDR(KMSANSHADPML4I + NKMSANSHADPML4E,
+			    0, 0, 0),
+	.kmsan_origin_low = KV4ADDR(KMSANORIGPML4I, 0, 0, 0),
+	.kmsan_origin_high = KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E,
+			    0, 0, 0),
 };
 
 /*
@@ -2003,7 +2023,7 @@ create_pagetables(vm_paddr_t *firstaddr)
 				 */
 				p5_p[i] = KPML5phys | X86_PG_RW | X86_PG_A |
 				    X86_PG_M | X86_PG_V | pg_nx;
-			} else if (i >= DMPML5I && i < DMPML5I + NDMPML5E) {
+			} else if (i >= DMPML5I && i < DMPML5I + ndmpml4phys) {
 				/* Connect DMAP pml4 pages to PML5. */
 				p5_p[i] = (DMPML4phys + ptoa(i - DMPML5I)) |
 				    X86_PG_RW | X86_PG_V | pg_nx;
@@ -5942,17 +5962,18 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 	if (mpte == NULL) {
 		/*
 		 * Invalidate the 2MB page mapping and return "failure" if the
-		 * mapping was never accessed.
+		 * mapping was never accessed and not wired.
 		 */
 		if ((oldpde & PG_A) == 0) {
-			KASSERT((oldpde & PG_W) == 0,
-		    ("pmap_demote_pde: a wired mapping is missing PG_A"));
-			pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
-			return (false);
-		}
-
-		mpte = pmap_remove_pt_page(pmap, va);
-		if (mpte == NULL) {
+			if ((oldpde & PG_W) == 0) {
+				pmap_demote_pde_abort(pmap, va, pde, oldpde,
+				    lockp);
+				return (false);
+			}
+			mpte = pmap_remove_pt_page(pmap, va);
+			/* Fill the PTP with PTEs that have PG_A cleared. */
+			mpte->valid = 0;
+		} else if ((mpte = pmap_remove_pt_page(pmap, va)) == NULL) {
 			KASSERT((oldpde & PG_W) == 0,
     ("pmap_demote_pde: page table page for a wired mapping is missing"));
 
@@ -6004,7 +6025,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 	/*
 	 * If the PTP is not leftover from an earlier promotion or it does not
 	 * have PG_A set in every PTE, then fill it.  The new PTEs will all
-	 * have PG_A set.
+	 * have PG_A set, unless this is a wired mapping with PG_A clear.
 	 */
 	if (!vm_page_all_valid(mpte))
 		pmap_fill_ptp(firstpte, newpte);
@@ -11880,9 +11901,7 @@ sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
 	    mode, range->pdpes, range->pdes, range->ptes);
 
 	/* Reset to sentinel value. */
-	range->sva = la57 ? KV5ADDR(NPML5EPG - 1, NPML4EPG - 1, NPDPEPG - 1,
-	    NPDEPG - 1, NPTEPG - 1) : KV4ADDR(NPML4EPG - 1, NPDPEPG - 1,
-	    NPDEPG - 1, NPTEPG - 1);
+	range->sva = kva_layout.kva_max;
 }
 
 /*
@@ -11923,12 +11942,18 @@ sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
  */
 static void
 sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
-    vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde,
-    pt_entry_t pte)
+    vm_offset_t va, pml5_entry_t pml5e, pml4_entry_t pml4e, pdp_entry_t pdpe,
+    pd_entry_t pde, pt_entry_t pte)
 {
 	pt_entry_t attrs;
 
-	attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx);
+	if (la57) {
+		attrs = pml5e & (X86_PG_RW | X86_PG_U | pg_nx);
+		attrs |= pml4e & pg_nx;
+		attrs &= pg_nx | (pml4e & (X86_PG_RW | X86_PG_U));
+	} else {
+		attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx);
+	}
 
 	attrs |= pdpe & pg_nx;
 	attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U));
@@ -11961,13 +11986,15 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
 {
 	struct pmap_kernel_map_range range;
 	struct sbuf sbuf, *sb;
+	pml5_entry_t pml5e;
 	pml4_entry_t pml4e;
 	pdp_entry_t *pdp, pdpe;
 	pd_entry_t *pd, pde;
 	pt_entry_t *pt, pte;
 	vm_offset_t sva;
 	vm_paddr_t pa;
-	int error, i, j, k, l;
+	int error, j, k, l;
+	bool first;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
@@ -11976,9 +12003,8 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
 	sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
 
 	/* Sentinel value. */
-	range.sva = la57 ? KV5ADDR(NPML5EPG - 1, NPML4EPG - 1, NPDPEPG - 1,
-	    NPDEPG - 1, NPTEPG - 1) : KV4ADDR(NPML4EPG - 1, NPDPEPG - 1,
-	    NPDEPG - 1, NPTEPG - 1);
+	range.sva = kva_layout.kva_max;
+	pml5e = 0;	/* no UB for la48 */
 
 	/*
 	 * Iterate over the kernel page tables without holding the kernel pmap
@@ -11987,44 +12013,50 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
 	 * Within the large map, ensure that PDP and PD page addresses are
 	 * valid before descending.
 	 */
-	for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) {
-		switch (i) {
-		case PML4PML4I:
-			if (!la57)
-				sbuf_printf(sb, "\nRecursive map:\n");
-			break;
-		case DMPML4I:
-			if (!la57)
-				sbuf_printf(sb, "\nDirect map:\n");
-			break;
+	for (first = true, sva = 0; sva != 0 || first; first = false) {
+		if (sva == kva_layout.rec_pt)
+			sbuf_printf(sb, "\nRecursive map:\n");
+		else if (sva == kva_layout.dmap_low)
+			sbuf_printf(sb, "\nDirect map:\n");
 #ifdef KASAN
-		case KASANPML4I:
+		else if (sva == kva_layout.kasan_shadow_low)
 			sbuf_printf(sb, "\nKASAN shadow map:\n");
-			break;
 #endif
 #ifdef KMSAN
-		case KMSANSHADPML4I:
+		else if (sva == kva_layout.kmsan_shadow_low)
 			sbuf_printf(sb, "\nKMSAN shadow map:\n");
-			break;
-		case KMSANORIGPML4I:
+		else if (sva == kva_layout.kmsan_origin_low)
 			sbuf_printf(sb, "\nKMSAN origin map:\n");
-			break;
 #endif
-		case KPML4BASE:
+		else if (sva == kva_layout.km_low)
 			sbuf_printf(sb, "\nKernel map:\n");
-			break;
-		case LMSPML4I:
-			if (!la57)
-				sbuf_printf(sb, "\nLarge map:\n");
-			break;
-		}
+		else if (sva == kva_layout.lm_low)
+			sbuf_printf(sb, "\nLarge map:\n");
 
 		/* Convert to canonical form. */
-		if (sva == 1ul << 47)
-			sva |= -1ul << 48;
+		if (la57) {
+			if (sva == 1ul << 56) {
+				sva |= -1ul << 57;
+				continue;
+			}
+		} else {
+			if (sva == 1ul << 47) {
+				sva |= -1ul << 48;
+				continue;
+			}
+		}
 
 restart:
-		pml4e = kernel_pml4[i];
+		if (la57) {
+			pml5e = *pmap_pml5e(kernel_pmap, sva);
+			if ((pml5e & X86_PG_V) == 0) {
+				sva = rounddown2(sva, NBPML5);
+				sysctl_kmaps_dump(sb, &range, sva);
+				sva += NBPML5;
+				continue;
+			}
+		}
+		pml4e = *pmap_pml4e(kernel_pmap, sva);
 		if ((pml4e & X86_PG_V) == 0) {
 			sva = rounddown2(sva, NBPML4);
 			sysctl_kmaps_dump(sb, &range, sva);
@@ -12045,8 +12077,8 @@ restart:
 			pa = pdpe & PG_FRAME;
 			if ((pdpe & PG_PS) != 0) {
 				sva = rounddown2(sva, NBPDP);
-				sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe,
-				    0, 0);
+				sysctl_kmaps_check(sb, &range, sva, pml5e,
+				    pml4e, pdpe, 0, 0);
 				range.pdpes++;
 				sva += NBPDP;
 				continue;
@@ -12058,6 +12090,7 @@ restart:
 				 * freed.  Validate the next-level address
 				 * before descending.
 				 */
+				sva += NBPDP;
 				goto restart;
 			}
 			pd = (pd_entry_t *)PHYS_TO_DMAP(pa);
@@ -12074,7 +12107,7 @@ restart:
 				if ((pde & PG_PS) != 0) {
 					sva = rounddown2(sva, NBPDR);
 					sysctl_kmaps_check(sb, &range, sva,
-					    pml4e, pdpe, pde, 0);
+					    pml5e, pml4e, pdpe, pde, 0);
 					range.pdes++;
 					sva += NBPDR;
 					continue;
@@ -12086,6 +12119,7 @@ restart:
 					 * may be freed.  Validate the
 					 * next-level address before descending.
 					 */
+					sva += NBPDR;
 					goto restart;
 				}
 				pt = (pt_entry_t *)PHYS_TO_DMAP(pa);
@@ -12099,7 +12133,7 @@ restart:
 						continue;
 					}
 					sysctl_kmaps_check(sb, &range, sva,
-					    pml4e, pdpe, pde, pte);
+					    pml5e, pml4e, pdpe, pde, pte);
 					range.ptes++;
 				}
 			}
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index a0ca97f2d5a0..e2f97442c10f 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -557,6 +557,7 @@ pmap_pml5e_index(vm_offset_t va)
 
 struct kva_layout_s {
 	vm_offset_t kva_min;
+	vm_offset_t kva_max;
 	vm_offset_t dmap_low;	/* DMAP_MIN_ADDRESS */
 	vm_offset_t dmap_high;	/* DMAP_MAX_ADDRESS */
 	vm_offset_t lm_low;	/* LARGEMAP_MIN_ADDRESS */
@@ -564,6 +565,12 @@ struct kva_layout_s {
 	vm_offset_t km_low;	/* VM_MIN_KERNEL_ADDRESS */
 	vm_offset_t km_high;	/* VM_MAX_KERNEL_ADDRESS */
 	vm_offset_t rec_pt;
+	vm_offset_t kasan_shadow_low;	/* KASAN_MIN_ADDRESS */
+	vm_offset_t kasan_shadow_high;	/* KASAN_MAX_ADDRESS */
+	vm_offset_t kmsan_shadow_low;	/* KMSAN_SHAD_MIN_ADDRESS */
+	vm_offset_t kmsan_shadow_high;	/* KMSAN_SHAD_MAX_ADDRESS */
+	vm_offset_t kmsan_origin_low;	/* KMSAN_ORIG_MIN_ADDRESS */
+	vm_offset_t kmsan_origin_high;	/* KMSAN_ORIG_MAX_ADDRESS */
 };
 extern struct kva_layout_s kva_layout;
 
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index a9c73b75213b..0b3daed4f69e 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -649,6 +649,8 @@ struct vm_inout_str {
 	int		addrsize;
 	enum vm_reg_name seg_name;
 	struct seg_desc seg_desc;
+	int		cs_d;
+	uint64_t	cs_base;
 };
 
 enum task_switch_reason {
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 1f86538ce5f3..441330fd57b8 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -29,6 +29,8 @@
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
+#include <sys/domainset.h>
+
 #include <machine/vmm.h>
 #include <machine/vmm_snapshot.h>
 
@@ -52,7 +54,10 @@ struct vm_munmap {
 struct vm_memseg {
 	int		segid;
 	size_t		len;
-	char		name[VM_MAX_SUFFIXLEN + 1];
+	char 		name[VM_MAX_SUFFIXLEN + 1];
+	domainset_t	*ds_mask;
+	size_t		ds_mask_size;
+	int 		ds_policy;
 };
 
 struct vm_register {
diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
index d5f0363cfb41..1fb0f97682a7 100644
--- a/sys/amd64/include/vmm_instruction_emul.h
+++ b/sys/amd64/include/vmm_instruction_emul.h
@@ -31,6 +31,31 @@
 
 #include <sys/mman.h>
 
+/* struct vie_op.op_type */
+enum {
+	VIE_OP_TYPE_NONE = 0,
+	VIE_OP_TYPE_MOV,
+	VIE_OP_TYPE_MOVSX,
+	VIE_OP_TYPE_MOVZX,
+	VIE_OP_TYPE_AND,
+	VIE_OP_TYPE_OR,
+	VIE_OP_TYPE_SUB,
+	VIE_OP_TYPE_TWO_BYTE,
+	VIE_OP_TYPE_PUSH,
+	VIE_OP_TYPE_CMP,
+	VIE_OP_TYPE_POP,
+	VIE_OP_TYPE_MOVS,
+	VIE_OP_TYPE_GROUP1,
+	VIE_OP_TYPE_STOS,
+	VIE_OP_TYPE_BITTEST,
+	VIE_OP_TYPE_TWOB_GRP15,
+	VIE_OP_TYPE_ADD,
+	VIE_OP_TYPE_TEST,
+	VIE_OP_TYPE_BEXTR,
+	VIE_OP_TYPE_OUTS,
+	VIE_OP_TYPE_LAST
+};
+
 /*
  * Callback functions to read and write memory regions.
  */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index ef352e776af6..d2ac3c6648b2 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -200,16 +200,14 @@
 #define	VM_MIN_KERNEL_ADDRESS		kva_layout.km_low
 #define	VM_MAX_KERNEL_ADDRESS		kva_layout.km_high
 
-#define	KASAN_MIN_ADDRESS	KV4ADDR(KASANPML4I, 0, 0, 0)
-#define	KASAN_MAX_ADDRESS	KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0)
+#define	KASAN_MIN_ADDRESS		(kva_layout.kasan_shadow_low)
+#define	KASAN_MAX_ADDRESS		(kva_layout.kasan_shadow_high)
 
-#define	KMSAN_SHAD_MIN_ADDRESS	KV4ADDR(KMSANSHADPML4I, 0, 0, 0)
-#define	KMSAN_SHAD_MAX_ADDRESS	KV4ADDR(KMSANSHADPML4I + NKMSANSHADPML4E, \
-					0, 0, 0)
+#define	KMSAN_SHAD_MIN_ADDRESS		(kva_layout.kmsan_shadow_low)
+#define	KMSAN_SHAD_MAX_ADDRESS		(kva_layout.kmsan_shadow_high)
 
-#define	KMSAN_ORIG_MIN_ADDRESS	KV4ADDR(KMSANORIGPML4I, 0, 0, 0)
-#define	KMSAN_ORIG_MAX_ADDRESS	KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \
-					0, 0, 0)
+#define	KMSAN_ORIG_MIN_ADDRESS		(kva_layout.kmsan_origin_low)
+#define	KMSAN_ORIG_MAX_ADDRESS		(kva_layout.kmsan_origin_high)
 
 /*
  * Formally kernel mapping starts at KERNBASE, but kernel linker
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 6c16daaa47c2..2fe6a5bc3584 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -317,6 +317,33 @@ svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset)
 #define MSR_AMD7TH_START 	0xC0010000UL
 #define MSR_AMD7TH_END 		0xC0011FFFUL
 
+static void
+svm_get_cs_info(struct vmcb *vmcb, struct vm_guest_paging *paging, int *cs_d,
+    uint64_t *base)
+{
+	struct vmcb_segment seg;
+	int error __diagused;
+
+	error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
+	KASSERT(error == 0, ("%s: vmcb_seg error %d", __func__, error));
+
+	switch (paging->cpu_mode) {
+	case CPU_MODE_REAL:
+		*base = seg.base;
+		*cs_d = 0;
+		break;
+	case CPU_MODE_PROTECTED:
+	case CPU_MODE_COMPATIBILITY:
+		*cs_d = !!(seg.attrib & VMCB_CS_ATTRIB_D);
+		*base = seg.base;
+		break;
+	default:
+		*base = 0;
+		*cs_d = 0;
+		break;
+	}
+}
+
 /*
  * Get the index and bit position for a MSR in permission bitmap.
  * Two bits are used for each MSR: lower bit for read and higher bit for write.
@@ -735,10 +762,29 @@ svm_inout_str_seginfo(struct svm_vcpu *vcpu, int64_t info1, int in,
 
 	if (in) {
 		vis->seg_name = VM_REG_GUEST_ES;
-	} else {
-		/* The segment field has standard encoding */
+	} else if (decode_assist()) {
+		/*
+		 * The effective segment number in EXITINFO1[12:10] is populated
+		 * only if the processor has the DecodeAssist capability.
+		 *
+		 * XXX this is not specified explicitly in APMv2 but can be
+		 * verified empirically.
+		 */
 		s = (info1 >> 10) & 0x7;
+
+		/* The segment field has standard encoding */
 		vis->seg_name = vm_segment_name(s);
+	} else {
+		/*
+		 * The segment register need to be manually decoded by fetching
+		 * the instructions near ip. However, we are unable to fetch it
+		 * while the interrupts are disabled. Therefore, we leave the
+		 * value unset until the generic ins/outs handler runs.
+		 */
+		vis->seg_name = VM_REG_LAST;
+		svm_get_cs_info(vcpu->vmcb, &vis->paging, &vis->cs_d,
+		    &vis->cs_base);
+		return;
 	}
 
 	error = svm_getdesc(vcpu, vis->seg_name, &vis->seg_desc);
@@ -798,16 +844,6 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit)
 	info1 = ctrl->exitinfo1;
 	inout_string = info1 & BIT(2) ? 1 : 0;
 
-	/*
-	 * The effective segment number in EXITINFO1[12:10] is populated
-	 * only if the processor has the DecodeAssist capability.
-	 *
-	 * XXX this is not specified explicitly in APMv2 but can be verified
-	 * empirically.
-	 */
-	if (inout_string && !decode_assist())
-		return (UNHANDLED);
-
 	vmexit->exitcode 	= VM_EXITCODE_INOUT;
 	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
 	vmexit->u.inout.string 	= inout_string;
@@ -825,6 +861,8 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit)
 		vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
 		vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
 		vis->addrsize = svm_inout_str_addrsize(info1);
+		vis->cs_d = 0;
+		vis->cs_base = 0;
 		svm_inout_str_seginfo(vcpu, info1, vmexit->u.inout.in, vis);
 	}
 
@@ -866,10 +904,9 @@ static void
 svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
 {
 	struct vm_guest_paging *paging;
-	struct vmcb_segment seg;
 	struct vmcb_ctrl *ctrl;
 	char *inst_bytes;
-	int error __diagused, inst_len;
+	int inst_len;
 
 	ctrl = &vmcb->ctrl;
 	paging = &vmexit->u.inst_emul.paging;
@@ -879,29 +916,8 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
 	vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
 	svm_paging_info(vmcb, paging);
 
-	error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
-	KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
-
-	switch(paging->cpu_mode) {
-	case CPU_MODE_REAL:
-		vmexit->u.inst_emul.cs_base = seg.base;
-		vmexit->u.inst_emul.cs_d = 0;
-		break;
-	case CPU_MODE_PROTECTED:
-	case CPU_MODE_COMPATIBILITY:
-		vmexit->u.inst_emul.cs_base = seg.base;
-
-		/*
-		 * Section 4.8.1 of APM2, Default Operand Size or D bit.
-		 */
-		vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
-		    1 : 0;
-		break;
-	default:
-		vmexit->u.inst_emul.cs_base = 0;
-		vmexit->u.inst_emul.cs_d = 0;
-		break;
-	}
+	svm_get_cs_info(vmcb, paging, &vmexit->u.inst_emul.cs_d,
+	    &vmexit->u.inst_emul.cs_base);
 
 	/*
 	 * Copy the instruction bytes into 'vie' if available.
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 957217ab2258..842281ab862e 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2659,6 +2659,8 @@ vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit)
 			vis->index = inout_str_index(vcpu, in);
 			vis->count = inout_str_count(vcpu, vis->inout.rep);
 			vis->addrsize = inout_str_addrsize(inst_info);
+			vis->cs_d = 0;
+			vis->cs_base = 0;
 			inout_str_seginfo(vcpu, inst_info, in, vis);
 		}
 		SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpuid, vmexit);
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index c53e32889000..c54b6e6d0074 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -65,30 +65,6 @@
 #include <x86/psl.h>
 #include <x86/specialreg.h>
 
-/* struct vie_op.op_type */
-enum {
-	VIE_OP_TYPE_NONE = 0,
-	VIE_OP_TYPE_MOV,
-	VIE_OP_TYPE_MOVSX,
-	VIE_OP_TYPE_MOVZX,
-	VIE_OP_TYPE_AND,
-	VIE_OP_TYPE_OR,
-	VIE_OP_TYPE_SUB,
-	VIE_OP_TYPE_TWO_BYTE,
-	VIE_OP_TYPE_PUSH,
-	VIE_OP_TYPE_CMP,
-	VIE_OP_TYPE_POP,
-	VIE_OP_TYPE_MOVS,
-	VIE_OP_TYPE_GROUP1,
-	VIE_OP_TYPE_STOS,
-	VIE_OP_TYPE_BITTEST,
-	VIE_OP_TYPE_TWOB_GRP15,
-	VIE_OP_TYPE_ADD,
-	VIE_OP_TYPE_TEST,
-	VIE_OP_TYPE_BEXTR,
-	VIE_OP_TYPE_LAST
-};
-
 /* struct vie_op.op_flags */
 #define	VIE_OP_F_IMM		(1 << 0)  /* 16/32-bit immediate operand */
 #define	VIE_OP_F_IMM8		(1 << 1)  /* 8-bit immediate operand */
@@ -152,6 +128,16 @@ static const struct vie_op one_byte_opcodes[256] = {
 		.op_byte = 0x3B,
 		.op_type = VIE_OP_TYPE_CMP,
 	},
+	[0x6E] = {
+		.op_byte = 0x6E,
+		.op_type = VIE_OP_TYPE_OUTS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
+	},
+	[0x6F] = {
+		.op_byte = 0x6F,
+		.op_type = VIE_OP_TYPE_OUTS,
+		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
+	},
 	[0x88] = {
 		.op_byte = 0x88,
 		.op_type = VIE_OP_TYPE_MOV,
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index fc1ecab9f209..8aab28f5e68e 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -145,9 +145,49 @@ emulate_inout_port(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu)
 }
 
 static int
+decode_segment(struct vcpu *vcpu, enum vm_reg_name *segment)
+{
+	struct vm_guest_paging *paging;
+	struct vie vie;
+	struct vm_exit *vme;
+	int err;
+	int fault;
+
+	vme = vm_exitinfo(vcpu);
+	paging = &vme->u.inout_str.paging;
+
+	vie_init(&vie, NULL, 0);
+	err = vmm_fetch_instruction(vcpu, paging,
+	    vme->rip + vme->u.inout_str.cs_base, VIE_INST_SIZE, &vie, &fault);
+	if (err || fault)
+		return (err);
+
+	err = vmm_decode_instruction(vcpu, VIE_INVALID_GLA, paging->cpu_mode,
+	    vme->u.inout_str.cs_d, &vie);
+
+	if (err || vie.op.op_type != VIE_OP_TYPE_OUTS)
+		return (EINVAL);
+	if (vie.segment_override)
+		*segment = vie.segment_register;
+	else
+		*segment = VM_REG_GUEST_DS;
+
+	return (0);
+}
+
+static int
 emulate_inout_str(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu)
 {
+	int err;
+
 	*retu = true;
+	if (vmexit->u.inout_str.seg_name == VM_REG_LAST) {
+		err = decode_segment(vcpu, &vmexit->u.inout_str.seg_name);
+		if (err)
+			return (err);
+		return (vm_get_seg_desc(vcpu, vmexit->u.inout_str.seg_name,
+		    &vmexit->u.inout_str.seg_desc));
+	}
 	return (0);	/* Return to userspace to finish emulation */
 }
 
diff --git a/sys/arm/allwinner/aw_mmc.c b/sys/arm/allwinner/aw_mmc.c
index 6bebf5e5fb5e..a8add957dc74 100644
--- a/sys/arm/allwinner/aw_mmc.c
+++ b/sys/arm/allwinner/aw_mmc.c
@@ -84,21 +84,26 @@
 
 struct aw_mmc_conf {
 	uint32_t	dma_xferlen;
+	uint32_t	dma_desc_shift;
 	bool		mask_data0;
 	bool		can_calibrate;
 	bool		new_timing;
+	bool		zero_is_skip;
 };
 
 static const struct aw_mmc_conf a10_mmc_conf = {
 	.dma_xferlen = 0x2000,
+	.dma_desc_shift = 0,
 };
 
 static const struct aw_mmc_conf a13_mmc_conf = {
 	.dma_xferlen = 0x10000,
+	.dma_desc_shift = 0,
 };
 
 static const struct aw_mmc_conf a64_mmc_conf = {
 	.dma_xferlen = 0x10000,
+	.dma_desc_shift = 0,
 	.mask_data0 = true,
 	.can_calibrate = true,
 	.new_timing = true,
@@ -106,13 +111,24 @@ static const struct aw_mmc_conf a64_mmc_conf = {
 
 static const struct aw_mmc_conf a64_emmc_conf = {
 	.dma_xferlen = 0x2000,
+	.dma_desc_shift = 0,
 	.can_calibrate = true,
 };
 
+static const struct aw_mmc_conf d1_mmc_conf = {
+	.dma_xferlen = 0x1000,
+	.dma_desc_shift = 2,
+	.mask_data0 = true,
+	.can_calibrate = true,
+	.new_timing = true,
+	.zero_is_skip = true,
+};
+
 static struct ofw_compat_data compat_data[] = {
 	{"allwinner,sun4i-a10-mmc", (uintptr_t)&a10_mmc_conf},
 	{"allwinner,sun5i-a13-mmc", (uintptr_t)&a13_mmc_conf},
 	{"allwinner,sun7i-a20-mmc", (uintptr_t)&a13_mmc_conf},
+	{"allwinner,sun20i-d1-mmc", (uintptr_t)&d1_mmc_conf},
 	{"allwinner,sun50i-a64-mmc", (uintptr_t)&a64_mmc_conf},
 	{"allwinner,sun50i-a64-emmc", (uintptr_t)&a64_emmc_conf},
 	{NULL,             0}
@@ -607,16 +623,18 @@ aw_dma_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int err)
 
 	dma_desc = sc->aw_dma_desc;
 	for (i = 0; i < nsegs; i++) {
-		if (segs[i].ds_len == sc->aw_mmc_conf->dma_xferlen)
+		if ((segs[i].ds_len == sc->aw_mmc_conf->dma_xferlen) &&
+		    !sc->aw_mmc_conf->zero_is_skip)
 			dma_desc[i].buf_size = 0;		/* Size of 0 indicate max len */
 		else
 			dma_desc[i].buf_size = segs[i].ds_len;
-		dma_desc[i].buf_addr = segs[i].ds_addr;
+		dma_desc[i].buf_addr = segs[i].ds_addr >>
+		    sc->aw_mmc_conf->dma_desc_shift;
 		dma_desc[i].config = AW_MMC_DMA_CONFIG_CH |
-			AW_MMC_DMA_CONFIG_OWN | AW_MMC_DMA_CONFIG_DIC;
-
-		dma_desc[i].next = sc->aw_dma_desc_phys +
-			((i + 1) * sizeof(struct aw_mmc_dma_desc));
+		    AW_MMC_DMA_CONFIG_OWN | AW_MMC_DMA_CONFIG_DIC;
+		dma_desc[i].next = (sc->aw_dma_desc_phys +
+		    (i + 1) * sizeof(struct aw_mmc_dma_desc)) >>
+		    sc->aw_mmc_conf->dma_desc_shift;
 	}
 
 	dma_desc[0].config |= AW_MMC_DMA_CONFIG_FD;
@@ -678,7 +696,8 @@ aw_mmc_prepare_dma(struct aw_mmc_softc *sc)
 	AW_MMC_WRITE_4(sc, AW_MMC_IDIE, val);
 
 	/* Set DMA descritptor list address */
-	AW_MMC_WRITE_4(sc, AW_MMC_DLBA, sc->aw_dma_desc_phys);
+	AW_MMC_WRITE_4(sc, AW_MMC_DLBA, sc->aw_dma_desc_phys >>
+	    sc->aw_mmc_conf->dma_desc_shift);
 
 	/* FIFO trigger level */
 	AW_MMC_WRITE_4(sc, AW_MMC_FWLR, AW_MMC_DMA_FTRGLEVEL);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 459cc8ebe505..2152f7fcc1c6 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -8501,18 +8501,20 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
 
 	/*
 	 * Invalidate the 2MB page mapping and return "failure" if the
-	 * mapping was never accessed.
+	 * mapping was never accessed and not wired.
 	 */
 	if ((oldl2 & ATTR_AF) == 0) {
-		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
-		    ("pmap_demote_l2: a wired mapping is missing ATTR_AF"));
-		pmap_demote_l2_abort(pmap, va, l2, lockp);
-		CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p",
-		    va, pmap);
-		goto fail;
-	}
-
-	if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
+		if ((oldl2 & ATTR_SW_WIRED) == 0) {
+			pmap_demote_l2_abort(pmap, va, l2, lockp);
+			CTR2(KTR_PMAP,
+			    "pmap_demote_l2: failure for va %#lx in pmap %p",
+			    va, pmap);
+			goto fail;
+		}
+		ml3 = pmap_remove_pt_page(pmap, va);
+		/* Fill the PTP with L3Es that have ATTR_AF cleared. */
+		ml3->valid = 0;
+	} else if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
 		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
 		    ("pmap_demote_l2: page table page for a wired mapping"
 		    " is missing"));
@@ -8568,7 +8570,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
 	/*
 	 * If the PTP is not leftover from an earlier promotion or it does not
 	 * have ATTR_AF set in every L3E, then fill it.  The new L3Es will all
-	 * have ATTR_AF set.
+	 * have ATTR_AF set, unless this is a wired mapping with ATTR_AF clear.
 	 *
 	 * When pmap_update_entry() clears the old L2 mapping, it (indirectly)
 	 * performs a dsb().  That dsb() ensures that the stores for filling
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
index 938bea47c7f8..219f1116c728 100644
--- a/sys/arm64/include/vmm_dev.h
+++ b/sys/arm64/include/vmm_dev.h
@@ -27,6 +27,8 @@
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
+#include <sys/domainset.h>
+
 #include <machine/vmm.h>
 
 struct vm_memmap {
@@ -49,6 +51,9 @@ struct vm_memseg {
 	int		segid;
 	size_t		len;
 	char		name[VM_MAX_SUFFIXLEN + 1];
+	domainset_t	*ds_mask;
+	size_t		ds_mask_size;
+	int		ds_policy;
 };
 
 struct vm_register {
diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c
index 2ec736e7f4ac..cae29226d13c 100644
--- a/sys/cam/cam_xpt.c
+++ b/sys/cam/cam_xpt.c
@@ -2515,6 +2515,15 @@ xpt_action(union ccb *start_ccb)
 	    ("xpt_action: func %#x %s\n", start_ccb->ccb_h.func_code,
 		xpt_action_name(start_ccb->ccb_h.func_code)));
 
+	/*
+	 * Either it isn't queued, or it has a real priority. There still too
+	 * many places that reuse CCBs with a real priority to do immediate
+	 * queries to do the other side of this assert.
+	 */
+	KASSERT((start_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0 ||
+	    start_ccb->ccb_h.pinfo.priority != CAM_PRIORITY_NONE,
+	    ("%s: queued ccb and CAM_PRIORITY_NONE illegal.", __func__));
+
 	start_ccb->ccb_h.status = CAM_REQ_INPROG;
 	(*(start_ccb->ccb_h.path->bus->xport->ops->action))(start_ccb);
 }
diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c
index 7f8bf3516804..322141a72707 100644
--- a/sys/cam/mmc/mmc_da.c
+++ b/sys/cam/mmc/mmc_da.c
@@ -1081,7 +1081,7 @@ sdda_start_init_task(void *context, int pending)
 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("sdda_start_init_task\n"));
 	new_ccb = xpt_alloc_ccb();
 	xpt_setup_ccb(&new_ccb->ccb_h, periph->path,
-		      CAM_PRIORITY_NONE);
+		      CAM_PRIORITY_NORMAL);
 
 	cam_periph_lock(periph);
 	cam_periph_hold(periph, PRIBIO|PCATCH);
diff --git a/sys/cam/mmc/mmc_xpt.c b/sys/cam/mmc/mmc_xpt.c
index 4fce03004994..f5f66f5214a8 100644
--- a/sys/cam/mmc/mmc_xpt.c
+++ b/sys/cam/mmc/mmc_xpt.c
@@ -610,7 +610,6 @@ mmcprobe_start(struct cam_periph *periph, union ccb *start_ccb)
 		CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_PROBE, ("Start with PROBE_RESET\n"));
 		/* FALLTHROUGH */
 	case PROBE_IDENTIFY:
-		xpt_path_inq(&start_ccb->cpi, periph->path);
 		CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_PROBE, ("Start with PROBE_IDENTIFY\n"));
 		init_standard_ccb(start_ccb, XPT_MMC_GET_TRAN_SETTINGS);
 		break;
diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c
index a8da618204af..0a9059104671 100644
--- a/sys/cddl/dev/sdt/sdt.c
+++ b/sys/cddl/dev/sdt/sdt.c
@@ -72,6 +72,7 @@ static void	sdt_load(void);
 static int	sdt_unload(void);
 static void	sdt_create_provider(struct sdt_provider *);
 static void	sdt_create_probe(struct sdt_probe *);
+static void	sdt_init_probe(struct sdt_probe *, linker_file_t);
 static void	sdt_kld_load(void *, struct linker_file *);
 static void	sdt_kld_unload_try(void *, struct linker_file *, int *);
 
@@ -204,6 +205,14 @@ sdt_create_probe(struct sdt_probe *probe)
 	(void)dtrace_probe_create(prov->id, mod, func, name, aframes, probe);
 }
 
+static void
+sdt_init_probe(struct sdt_probe *probe, linker_file_t lf)
+{
+	probe->sdtp_lf = lf;
+	TAILQ_INIT(&probe->argtype_list);
+	STAILQ_INIT(&probe->tracepoint_list);
+}
+
 /*
  * Probes are created through the SDT module load/unload hook, so this function
  * has nothing to do. It only exists because the DTrace provider framework
@@ -361,12 +370,19 @@ static void
 sdt_kld_load_providers(struct linker_file *lf)
 {
 	struct sdt_provider **prov, **begin, **end;
+	struct sdt_probe **p_begin, **p_end;
 
 	if (linker_file_lookup_set(lf, "sdt_providers_set", &begin, &end,
 	    NULL) == 0) {
 		for (prov = begin; prov < end; prov++)
 			sdt_create_provider(*prov);
 	}
+
+	if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
+	    NULL) == 0) {
+		for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
+			sdt_init_probe(*probe, lf);
+	}
 }
 
 static void
@@ -378,13 +394,8 @@ sdt_kld_load_probes(struct linker_file *lf)
 
 	if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
 	    NULL) == 0) {
-		for (struct sdt_probe **probe = p_begin; probe < p_end;
-		    probe++) {
-			(*probe)->sdtp_lf = lf;
+		for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
 			sdt_create_probe(*probe);
-			TAILQ_INIT(&(*probe)->argtype_list);
-			STAILQ_INIT(&(*probe)->tracepoint_list);
-		}
 	}
 
 	if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end,
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index cfb054235489..1c6d64d6b8bc 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -1911,7 +1911,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
 			    "kern.sigqueue.max_pending_per_proc",
 			    &res, &size, 0, 0, 0, 0);
 			if (error != 0)
-				goto out;
+				continue;
 			rl.rlim_cur = res;
 			rl.rlim_max = res;
 			break;
@@ -1919,7 +1919,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
 			error = kernel_sysctlbyname(td,
 			    "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
 			if (error != 0)
-				goto out;
+				continue;
 			rl.rlim_cur = res;
 			rl.rlim_max = res;
 			break;
@@ -1941,9 +1941,9 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
 			    li->desc, (unsigned long long)rl.rlim_cur,
 			    (unsigned long long)rl.rlim_max, li->unit);
 	}
-out:
+
 	lim_free(limp);
-	return (error);
+	return (0);
 }
 
 /*
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 86834a7ecea8..a4be5313aa96 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -1792,7 +1792,7 @@ linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
 	if ((flags & MFD_ALLOW_SEALING) != 0)
 		shmflags |= SHM_ALLOW_SEALING;
 	return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
-	    memfd_name));
+	    memfd_name, NULL));
 }
 
 int
diff --git a/sys/conf/files b/sys/conf/files
index dd0d390962f2..b7c19fae0b8e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3768,6 +3768,7 @@ gnu/gcov/gcov_subr.c		optional gcov
 
 kern/bus_if.m			standard
 kern/clock_if.m			standard
+kern/coredump_vnode.c		standard
 kern/cpufreq_if.m		standard
 kern/device_if.m		standard
 kern/imgact_binmisc.c		optional imgact_binmisc
@@ -3856,6 +3857,7 @@ kern/kern_time.c		standard
 kern/kern_timeout.c		standard
 kern/kern_tslog.c		optional tslog
 kern/kern_ubsan.c		optional kubsan
+kern/kern_ucoredump.c		standard
 kern/kern_umtx.c		standard
 kern/kern_uuid.c		standard
 kern/kern_vnodedumper.c		standard
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 901da27e63f2..641001efab5e 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -368,6 +368,10 @@ dev/ice/irdma_di_if.m				optional ice pci \
 	compile-with "${NORMAL_M} -I$S/dev/ice"
 dev/ice/ice_ddp_common.c			optional ice pci \
 	compile-with "${NORMAL_C} -I$S/dev/ice"
+dev/ice/ice_iov.c	optional	ice pci pci_iov \
+	compile-with "${NORMAL_C} -I$S/dev/ice"
+dev/ice/ice_vf_mbx.c	optional	ice pci pci_iov \
+	compile-with "${NORMAL_C} -I$S/dev/ice"
 ice_ddp.c					optional ice_ddp	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01032900 -mice_ddp -c${.TARGET}"	\
 	no-ctfconvert no-implicit-rule before-depend local	\
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index df206b314b38..9976e9cfec5d 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -62,6 +62,7 @@ dev/acpi_support/acpi_wmi_if.m	standard
 dev/agp/agp_amd64.c		optional	agp
 dev/agp/agp_i810.c		optional	agp
 dev/agp/agp_via.c		optional	agp
+dev/amdsmu/amdsmu.c		optional	amdsmu pci
 dev/amdsbwd/amdsbwd.c		optional	amdsbwd
 dev/amdsmn/amdsmn.c		optional	amdsmn | amdtemp
 dev/amdtemp/amdtemp.c		optional	amdtemp
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+	const uint16_t vendorid = pci_get_vendor(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+
+	for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+		const struct amdsmu_product *prod = &amdsmu_products[i];
+
+		if (vendorid == prod->amdsmu_vendorid &&
+		    deviceid == prod->amdsmu_deviceid) {
+			if (product_out != NULL)
+				*product_out = prod;
+			return (true);
+		}
+	}
+	return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+	if (device_find_child(parent, "amdsmu", -1) != NULL)
+		return;
+
+	if (amdsmu_match(parent, NULL)) {
+		if (device_add_child(parent, "amdsmu", -1) == NULL)
+			device_printf(parent, "add amdsmu child failed\n");
+	}
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+	if (resource_disabled("amdsmu", 0))
+		return (ENXIO);
+	if (!amdsmu_match(device_get_parent(dev), NULL))
+		return (ENXIO);
+	device_set_descf(dev, "AMD System Management Unit");
+
+	return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/*
+	 * The SMU has a response ready for us when the response register is
+	 * set.  Otherwise, we must wait.
+	 */
+	for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+		res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+		if (res != SMU_RES_WAIT)
+			return (res);
+		pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+		    C_HARDCLOCK);
+	}
+	device_printf(dev, "timed out waiting for response from SMU\n");
+	return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/* Wait for SMU to be ready. */
+	if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+		return (ETIMEDOUT);
+
+	/* Clear previous response. */
+	amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+	/* Write out command to registers. */
+	amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+	amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+	/* Wait for SMU response and handle it. */
+	res = amdsmu_wait_res(dev);
+
+	switch (res) {
+	case SMU_RES_WAIT:
+		return (ETIMEDOUT);
+	case SMU_RES_OK:
+		if (ret != NULL)
+			*ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+		return (0);
+	case SMU_RES_REJECT_BUSY:
+		device_printf(dev, "SMU is busy\n");
+		return (EBUSY);
+	case SMU_RES_REJECT_PREREQ:
+	case SMU_RES_UNKNOWN:
+	case SMU_RES_FAILED:
+		device_printf(dev, "SMU error: %02x\n", res);
+		return (EIO);
+	}
+
+	return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+	int err;
+	uint32_t smu_vers;
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+	if (err != 0) {
+		device_printf(dev, "failed to get SMU version\n");
+		return (err);
+	}
+	sc->smu_program = (smu_vers >> 24) & 0xFF;
+	sc->smu_maj = (smu_vers >> 16) & 0xFF;
+	sc->smu_min = (smu_vers >> 8) & 0xFF;
+	sc->smu_rev = smu_vers & 0xFF;
+	device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+	    sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+	return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+	int err;
+	struct amdsmu_metrics *m = &sc->metrics;
+	bool active;
+	char sysctl_descr[32];
+
+	/* Get IP block count. */
+	switch (deviceid) {
+	case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+		sc->ip_block_count = 12;
+		break;
+	case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+		sc->ip_block_count = 21;
+		break;
+	/* TODO How many IP blocks does Strix Point (and the others) have? */
+	case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+	default:
+		sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+	}
+	KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+	    ("too many IP blocks for array"));
+
+	/* Get and print out IP blocks. */
+	err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+	    &sc->active_ip_blocks);
+	if (err != 0) {
+		device_printf(dev, "failed to get IP blocks\n");
+		return (err);
+	}
+	device_printf(dev, "Active IP blocks: ");
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		active = (sc->active_ip_blocks & (1 << i)) != 0;
+		sc->ip_blocks_active[i] = active;
+		if (!active)
+			continue;
+		printf("%s%s", amdsmu_ip_blocks_names[i],
+		    i + 1 < sc->ip_block_count ? " " : "\n");
+	}
+
+	/* Create a sysctl node for IP blocks. */
+	sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+	    SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+	    CTLFLAG_RD, NULL, "SMU metrics");
+	if (sc->ip_blocks_sysctlnode == NULL) {
+		device_printf(dev, "could not add sysctl node for IP blocks\n");
+		return (ENOMEM);
+	}
+
+	/* Create a sysctl node for each IP block. */
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		/* Create the sysctl node itself for the IP block. */
+		snprintf(sysctl_descr, sizeof sysctl_descr,
+		    "Metrics about the %s AMD IP block",
+		    amdsmu_ip_blocks_names[i]);
+		sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+		    amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+		if (sc->ip_block_sysctlnodes[i] == NULL) {
+			device_printf(dev,
+			    "could not add sysctl node for \"%s\"\n", sysctl_descr);
+			continue;
+		}
+		/*
+		 * Create sysctls for if the IP block is currently active, last
+		 * active time, and total active time.
+		 */
+		SYSCTL_ADD_BOOL(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+		    "IP block is currently active");
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+		    0, "How long the IP block was active for during the last"
+		    " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+		    0, "How long the IP block was active for during sleep in"
+		    " total (us)");
+#endif
+	}
+	return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t metrics_addr_lo, metrics_addr_hi;
+	uint64_t metrics_addr;
+
+	/* Get physical address of logging buffer. */
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+	if (err != 0)
+		return (err);
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+	if (err != 0)
+		return (err);
+	metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+	/* Map memory of logging buffer. */
+	err = bus_space_map(sc->bus_tag, metrics_addr,
+	    sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+	if (err != 0) {
+		device_printf(dev, "could not map bus space for SMU metrics\n");
+		return (err);
+	}
+
+	/* Start logging for metrics. */
+	amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+	amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+	return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+	if (err != 0) {
+		device_printf(dev, "failed to dump metrics\n");
+		return (err);
+	}
+	bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+	    (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+	return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t physbase_addr_lo, physbase_addr_hi;
+	uint64_t physbase_addr;
+	int rid = 0;
+	struct sysctl_oid *node;
+
+	/*
+	 * Find physical base address for SMU.
+	 * XXX I am a little confused about the masks here.  I'm just copying
+	 * what Linux does in the amd-pmc driver to get the base address.
+	 */
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+	physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+	physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+	physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+	/* Map memory for SMU and its registers. */
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resource\n");
+		return (ENXIO);
+	}
+
+	sc->bus_tag = rman_get_bustag(sc->res);
+
+	if (bus_space_map(sc->bus_tag, physbase_addr,
+	    SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU\n");
+		err = ENXIO;
+		goto err_smu_space;
+	}
+	if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+	    SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU regs\n");
+		err = ENXIO;
+		goto err_reg_space;
+	}
+
+	/* sysctl stuff. */
+	sc->sysctlctx = device_get_sysctl_ctx(dev);
+	sc->sysctlnode = device_get_sysctl_tree(dev);
+
+	/* Get version & add sysctls. */
+	if ((err = amdsmu_get_vers(dev)) != 0)
+		goto err_dump;
+
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+	    "SMU firmware major version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+	    "SMU firmware minor version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+	    "SMU firmware revision number");
+
+	/* Set up for getting metrics & add sysctls. */
+	if ((err = amdsmu_init_metrics(dev)) != 0)
+		goto err_dump;
+	if ((err = amdsmu_dump_metrics(dev)) != 0)
+		goto err_dump;
+
+	node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+	    OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+	if (node == NULL) {
+		device_printf(dev, "could not add sysctl node for metrics\n");
+		err = ENOMEM;
+		goto err_dump;
+	}
+
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+	    "SMU metrics table version");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+	    "How many times the sleep hint was set");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "s0i3_last_entry_status", CTLFLAG_RD,
+	    &sc->metrics.s0i3_last_entry_status, 0,
+	    "1 if last S0i3 entry was successful");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+	    "Time spent in S0i2 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.time_last_entering_s0i3, 0,
+	    "Time spent entering S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.total_time_entering_s0i3, 0,
+	    "Total time spent entering S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+	    0, "Time spent resuming from last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+	    0, "Total time spent resuming from sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+	    "Time spent in S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+	    0, "Total time spent in S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.time_last_in_sw_drips, 0,
+	    "Time spent in awake during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.total_time_in_sw_drips, 0,
+	    "Total time spent awake (us)");
+
+	/* Get IP blocks & add sysctls. */
+	err = amdsmu_get_ip_blocks(dev);
+	if (err != 0)
+		goto err_dump;
+
+	/* Get idlemask & add sysctl. */
+	amdsmu_fetch_idlemask(dev);
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+	    "value is not documented - only used to help AMD internally debug "
+	    "issues");
+
+	return (0);
+err_dump:
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int rid = 0;
+
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+	DEVMETHOD(device_identify,	amdsmu_identify),
+	DEVMETHOD(device_probe,		amdsmu_probe),
+	DEVMETHOD(device_attach,	amdsmu_attach),
+	DEVMETHOD(device_detach,	amdsmu_detach),
+	DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+	"amdsmu",
+	amdsmu_methods,
+	sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+    nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define	_AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US	50
+#define SMU_RES_READ_MAX	20000
+
+static const struct amdsmu_product {
+	uint16_t	amdsmu_vendorid;
+	uint16_t	amdsmu_deviceid;
+} amdsmu_products[] = {
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_PHOENIX_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+    "DISPLAY",
+    "CPU",
+    "GFX",
+    "VDD",
+    "ACP",
+    "VCN",
+    "ISP",
+    "NBIO",
+    "DF",
+    "USB3_0",
+    "USB3_1",
+    "LAPIC",
+    "USB3_2",
+    "USB3_3",
+    "USB3_4",
+    "USB4_0",
+    "USB4_1",
+    "MPM",
+    "JPEG",
+    "IPU",
+    "UMSCH",
+    "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+	struct sysctl_ctx_list	*sysctlctx;
+	struct sysctl_oid	*sysctlnode;
+
+	struct resource		*res;
+	bus_space_tag_t 	bus_tag;
+
+	bus_space_handle_t	smu_space;
+	bus_space_handle_t	reg_space;
+
+	uint8_t			smu_program;
+	uint8_t			smu_maj, smu_min, smu_rev;
+
+	uint32_t		active_ip_blocks;
+	struct sysctl_oid	*ip_blocks_sysctlnode;
+	size_t			ip_block_count;
+	struct sysctl_oid	*ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+	bool			ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+	bus_space_handle_t	metrics_space;
+	struct amdsmu_metrics	metrics;
+	uint32_t		idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+	return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+	bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define	_AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out?  Also, there are way more of these.  I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT		0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT		0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT	0x14A4
+
+#define SMU_INDEX_ADDRESS	0xB8
+#define SMU_INDEX_DATA		0xBC
+
+#define SMU_PHYSBASE_ADDR_LO	0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI	0x13B102EC
+
+#define SMU_MEM_SIZE		0x1000
+#define SMU_REG_SPACE_OFF	0x10000
+
+#define SMU_REG_MESSAGE		0x538
+#define SMU_REG_RESPONSE	0x980
+#define SMU_REG_ARGUMENT	0x9BC
+#define SMU_REG_IDLEMASK	0xD14
+
+enum amdsmu_res {
+	SMU_RES_WAIT		= 0x00,
+	SMU_RES_OK		= 0x01,
+	SMU_RES_REJECT_BUSY	= 0xFC,
+	SMU_RES_REJECT_PREREQ	= 0xFD,
+	SMU_RES_UNKNOWN		= 0xFE,
+	SMU_RES_FAILED		= 0xFF,
+};
+
+enum amdsmu_msg {
+	SMU_MSG_GETSMUVERSION		= 0x02,
+	SMU_MSG_LOG_GETDRAM_ADDR_HI	= 0x04,
+	SMU_MSG_LOG_GETDRAM_ADDR_LO	= 0x05,
+	SMU_MSG_LOG_START		= 0x06,
+	SMU_MSG_LOG_RESET		= 0x07,
+	SMU_MSG_LOG_DUMP_DATA		= 0x08,
+	SMU_MSG_GET_SUP_CONSTRAINTS	= 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+	uint32_t table_version;
+	uint32_t hint_count;
+	uint32_t s0i3_last_entry_status;
+	uint32_t time_last_in_s0i2;
+	uint64_t time_last_entering_s0i3;
+	uint64_t total_time_entering_s0i3;
+	uint64_t time_last_resuming;
+	uint64_t total_time_resuming;
+	uint64_t time_last_in_s0i3;
+	uint64_t total_time_in_s0i3;
+	uint64_t time_last_in_sw_drips;
+	uint64_t total_time_in_sw_drips;
+	/*
+	 * This is how long each IP block was active for (us), i.e., blocking
+	 * entry to S0i3.  In Linux, these are called "timecondition_notmet_*".
+	 *
+	 * XXX Total active time for IP blocks seems to be buggy and reporting
+	 * garbage (at least on Phoenix), so it's disabled for now.  The last
+	 * active time for the USB4_0 IP block also seems to be buggy.
+	 */
+	uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+	uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 8547f21586e1..7a6b1cbdd736 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -703,7 +703,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n;
 
-			if ((m->m_flags & M_NOTAVAIL) != 0)
+			if ((m->m_flags & M_NOTREADY) != 0)
 				break;
 			if (m->m_flags & M_EXTPG) {
 #ifdef KERN_TLS
@@ -787,7 +787,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* nothing to send */
 		if (plen == 0) {
-			KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
+			KASSERT(m == NULL || (m->m_flags & M_NOTREADY) != 0,
 			    ("%s: nothing to send, but m != NULL is ready",
 			    __func__));
 			break;
@@ -880,7 +880,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
-	} while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
+	} while (m != NULL && (m->m_flags & M_NOTREADY) == 0);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index c6377980fca9..27c16b9988ae 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -563,7 +563,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
-		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
+		if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
@@ -614,7 +614,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
-		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
+		    (m->m_next->m_flags & M_NOTREADY) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
 	struct task		 fb_mode_task;
 };
 
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
 static void
 vt_restore_fbdev_mode(void *arg, int pending)
 {
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
 
 enum efi_table_type {
 	TYPE_ESRT = 0,
-	TYPE_PROP
+	TYPE_PROP,
+	TYPE_MEMORY_ATTR
 };
 
 static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
 		free(buf, M_TEMP);
 		return (0);
 	}
+	case TYPE_MEMORY_ATTR:
+	{
+		efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+		struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+		int error;
+		void *buf;
+		size_t len = sizeof(struct efi_memory_attribute_table);
+
+		error = efi_get_table(&guid, (void **)&tbl_addr);
+		if (error)
+			return (error);
+
+		buf = malloc(len, M_TEMP, M_WAITOK);
+		error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+		if (error) {
+			free(buf, M_TEMP);
+			return (error);
+		}
+
+		mem_addr = (struct efi_memory_attribute_table *)buf;
+		if (mem_addr->version != 2) {
+			free(buf, M_TEMP);
+			return (EINVAL);
+		}
+		len += mem_addr->descriptor_size * mem_addr->num_ents;
+		if (len > EFI_TABLE_ALLOC_MAX) {
+			free(buf, M_TEMP);
+			return (ENOMEM);
+		}
+
+		*table_len = len;
+		if (taddr != NULL)
+			*taddr = tbl_addr;
+		free(buf, M_TEMP);
+		return (0);
+	}
 	}
 	return (ENOENT);
 }
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
 		enum efi_table_type type;
 	} tables[] = {
 		{ EFI_TABLE_ESRT,       TYPE_ESRT },
-		{ EFI_PROPERTIES_TABLE, TYPE_PROP }
+		{ EFI_PROPERTIES_TABLE, TYPE_PROP },
+		{ EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
 	};
 	size_t table_idx;
 	void *taddr;
diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c
index 9c0324a24685..3f1d7a0cefba 100644
--- a/sys/dev/iicbus/iichid.c
+++ b/sys/dev/iicbus/iichid.c
@@ -275,62 +275,36 @@ iichid_cmd_read(struct iichid_softc* sc, void *buf, iichid_size_t maxlen,
 	 * 6.1.3 - Retrieval of Input Reports
 	 * DEVICE returns the length (2 Bytes) and the entire Input Report.
 	 */
-	uint8_t actbuf[2] = { 0, 0 };
-	/* Read actual input report length. */
+
+	memset(buf, 0xaa, 2);   // In case nothing gets read
 	struct iic_msg msgs[] = {
-	    { sc->addr, IIC_M_RD | IIC_M_NOSTOP, sizeof(actbuf), actbuf },
+	    { sc->addr, IIC_M_RD, maxlen, buf },
 	};
-	uint16_t actlen;
 	int error;
 
 	error = iicbus_transfer(sc->dev, msgs, nitems(msgs));
 	if (error != 0)
 		return (error);
 
-	actlen = actbuf[0] | actbuf[1] << 8;
-#ifdef IICHID_SAMPLING
-	if ((actlen == 0 && sc->sampling_rate_slow < 0) ||
-	    (maxlen == 0 && sc->sampling_rate_slow >= 0)) {
-#else
+	DPRINTFN(sc, 5, "%*D\n", msgs[0].len, msgs[0].buf, " ");
+
+	uint16_t actlen = le16dec(buf);
+
 	if (actlen == 0) {
-#endif
-		/* Read and discard reset command response. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART,
-		        le16toh(sc->desc.wMaxInputLength) - 2, sc->intr_buf };
-		actlen = 0;
 		if (!sc->reset_acked) {
 			mtx_lock(&sc->mtx);
 			sc->reset_acked = true;
 			wakeup(&sc->reset_acked);
 			mtx_unlock(&sc->mtx);
 		}
-#ifdef IICHID_SAMPLING
-	} else if ((actlen <= 2 || actlen == 0xFFFF) &&
-		    sc->sampling_rate_slow >= 0) {
-		/* Read and discard 1 byte to send I2C STOP condition. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART, 1, actbuf };
-		actlen = 0;
-#endif
-	} else {
-		actlen -= 2;
-		if (actlen > maxlen) {
-			DPRINTF(sc, "input report too big. requested=%d "
-			    "received=%d\n", maxlen, actlen);
-			actlen = maxlen;
-		}
-		/* Read input report itself. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART, actlen, buf };
 	}
 
-	error = iicbus_transfer(sc->dev, msgs, 1);
-	if (error == 0 && actual_len != NULL)
+	if (actlen <= 2 || actlen > maxlen) {
+		actlen = 0;
+	}
+	if (actual_len != NULL) {
 		*actual_len = actlen;
-
-	DPRINTFN(sc, 5,
-	    "%*D - %*D\n", 2, actbuf, " ", msgs[0].len, msgs[0].buf, " ");
+	}
 
 	return (error);
 }
@@ -566,7 +540,7 @@ iichid_sampling_task(void *context, int pending)
 	error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
 	if (error == 0) {
 		if (actual > 0) {
-			sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+			sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
 			sc->missing_samples = 0;
 			if (sc->dup_size != actual ||
 			    memcmp(sc->dup_buf, sc->intr_buf, actual) != 0) {
@@ -577,7 +551,7 @@ iichid_sampling_task(void *context, int pending)
 				++sc->dup_samples;
 		} else {
 			if (++sc->missing_samples == 1)
-				sc->intr_handler(sc->intr_ctx, sc->intr_buf, 0);
+				sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, 0);
 			sc->dup_samples = 0;
 		}
 	} else
@@ -632,7 +606,7 @@ iichid_intr(void *context)
 	if (error == 0) {
 		if (sc->power_on && sc->open) {
 			if (actual != 0)
-				sc->intr_handler(sc->intr_ctx, sc->intr_buf,
+				sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2,
 				    actual);
 			else
 				DPRINTF(sc, "no data received\n");
@@ -842,11 +816,12 @@ iichid_intr_setup(device_t dev, device_t child __unused, hid_intr_t intr,
 
 	sc = device_get_softc(dev);
 	/*
-	 * Do not rely on wMaxInputLength, as some devices may set it to
-	 * a wrong length. Find the longest input report in report descriptor.
+	 * Do not rely just on wMaxInputLength, as some devices (which?)
+	 * may set it to a wrong length.  Also find the longest input report
+	 * in report descriptor, and add two for the length field.
 	 */
-	rdesc->rdsize =
-	    MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength) - 2);
+	rdesc->rdsize = 2 +
+	    MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength));
 	/* Write and get/set_report sizes are limited by I2C-HID protocol. */
 	rdesc->grsize = rdesc->srsize = IICHID_SIZE_MAX;
 	rdesc->wrsize = IICHID_SIZE_MAX;
@@ -919,7 +894,7 @@ iichid_intr_poll(device_t dev, device_t child __unused)
 	sc = device_get_softc(dev);
 	error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
 	if (error == 0 && actual != 0)
-		sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+		sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
 }
 
 /*
@@ -946,6 +921,7 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
 {
 	struct iichid_softc *sc;
 	device_t parent;
+	uint8_t *tmpbuf;
 	int error;
 
 	if (maxlen > IICHID_SIZE_MAX)
@@ -954,8 +930,12 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
 	parent = device_get_parent(sc->dev);
 	error = iicbus_request_bus(parent, sc->dev, IIC_WAIT);
 	if (error == 0) {
-		error = iichid_cmd_read(sc, buf, maxlen, actlen);
+		tmpbuf = malloc(maxlen + 2, M_DEVBUF, M_WAITOK | M_ZERO);
+		error = iichid_cmd_read(sc, tmpbuf, maxlen + 2, actlen);
 		iicbus_release_bus(parent, sc->dev);
+		if (*actlen > 0)
+			memcpy(buf, tmpbuf + 2, *actlen);
+		free(tmpbuf, M_DEVBUF);
 	}
 	return (iic2errno(error));
 }
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 29dc0c880e3a..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -89,6 +89,8 @@
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
 {
 	caddr_t mod;
 	u_char *ptr, *name, *type;
+	u_char scratch[40];
 	unsigned len;
 	int i;
+	vm_offset_t paddr;
 
 	/* figure out log2(NINDIR) */
 	for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
 			sx_xunlock(&md_sx);
 		}
 	}
+
+	/*
+	 * Load up to 32 pre-loaded disks
+	 */
+	for (int i = 0; i < 32; i++) {
+		if (resource_long_value("md", i, "physaddr",
+			(long *) &paddr) != 0 ||
+		    resource_int_value("md", i, "len", &len) != 0)
+		        break;
+		ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+		if (ptr != NULL && len != 0) {
+			sprintf(scratch, "preload%d 0x%016jx", i,
+			    (uintmax_t)paddr);
+			sx_xlock(&md_sx);
+			md_preloaded(ptr, len, scratch);
+			sx_xunlock(&md_sx);
+		}
+	}
+
 	status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 	    0600, MDCTL_NAME);
 	g_topology_lock();
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
-						struct nvme_async_event_request *aer);
+    struct nvme_async_event_request *aer);
 
 static void
 nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
 }
 
 static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
-	struct nvme_async_event_request		*aer = arg;
-	struct nvme_health_information_page	*health_info;
-	struct nvme_ns_list			*nsl;
-	struct nvme_error_information_entry	*err;
-	int i;
-
-	/*
-	 * If the log page fetch for some reason completed with an error,
-	 *  don't pass log page data to the consumers.  In practice, this case
-	 *  should never happen.
-	 */
-	if (nvme_completion_is_error(cpl))
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, NULL, 0);
-	else {
-		/* Convert data to host endian */
-		switch (aer->log_page_id) {
-		case NVME_LOG_ERROR:
-			err = (struct nvme_error_information_entry *)aer->log_page_buffer;
-			for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
-				nvme_error_information_entry_swapbytes(err++);
-			break;
-		case NVME_LOG_HEALTH_INFORMATION:
-			nvme_health_information_page_swapbytes(
-			    (struct nvme_health_information_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_CHANGED_NAMESPACE:
-			nvme_ns_list_swapbytes(
-			    (struct nvme_ns_list *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_COMMAND_EFFECT:
-			nvme_command_effects_page_swapbytes(
-			    (struct nvme_command_effects_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_RES_NOTIFICATION:
-			nvme_res_notification_page_swapbytes(
-			    (struct nvme_res_notification_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_SANITIZE_STATUS:
-			nvme_sanitize_status_page_swapbytes(
-			    (struct nvme_sanitize_status_page *)aer->log_page_buffer);
-			break;
-		default:
-			break;
-		}
-
-		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
-			health_info = (struct nvme_health_information_page *)
-			    aer->log_page_buffer;
-			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
-			    health_info->critical_warning);
-			/*
-			 * Critical warnings reported through the
-			 *  SMART/health log page are persistent, so
-			 *  clear the associated bits in the async event
-			 *  config so that we do not receive repeated
-			 *  notifications for the same event.
-			 */
-			aer->ctrlr->async_event_config &=
-			    ~health_info->critical_warning;
-			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
-			    aer->ctrlr->async_event_config, NULL, NULL);
-		} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
-		    !nvme_use_nvd) {
-			nsl = (struct nvme_ns_list *)aer->log_page_buffer;
-			for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
-				if (nsl->ns[i] > NVME_MAX_NAMESPACES)
-					break;
-				nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
-			}
-		}
-
-		/*
-		 * Pass the cpl data from the original async event completion,
-		 *  not the log page fetch.
-		 */
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
-	}
-
-	/*
-	 * Repost another asynchronous event request to replace the one
-	 *  that just completed.
-	 */
-	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 		return;
 	}
 
-	/* Associated log page is in bits 23:16 of completion entry dw0. */
+	/*
+	 * Save the completion status and associated log page is in bits 23:16
+	 * of completion entry dw0. Print a message and queue it for further
+	 * processing.
+	 */
+	memcpy(&aer->cpl, cpl, sizeof(*cpl));
 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
 	nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
 	    " page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
 	    NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
 	    aer->log_page_id);
-
-	if (is_log_page_id_valid(aer->log_page_id)) {
-		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
-		    aer->log_page_id);
-		memcpy(&aer->cpl, cpl, sizeof(*cpl));
-		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
-		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
-		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
-		    aer);
-		/* Wait to notify consumers until after log page is fetched. */
-	} else {
-		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
-		    NULL, 0);
-
-		/*
-		 * Repost another asynchronous event request to replace the one
-		 *  that just completed.
-		 */
-		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-	}
+	taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
 }
 
 static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 {
 	struct nvme_request *req;
 
-	aer->ctrlr = ctrlr;
 	/*
-	 * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
-	 * callback context.  AER completions should be handled on a dedicated
-	 * thread.
+	 * We're racing the reset thread, so let that process submit this again.
+	 * XXX does this really solve that race? And is that race even possible
+	 * since we only reset when we've no theard from the card in a long
+	 * time. Why would we get an AER in the middle of that just before we
+	 * kick off the reset?
 	 */
-	req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+	if (ctrlr->is_resetting)
+		return;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
 	    aer);
 	aer->req = req;
+	aer->log_page_id = 0;		/* Not a valid page */
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
+static void
+nvme_ctrlr_aer_done(void *arg,  const struct nvme_completion *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+
+	mtx_lock(&aer->mtx);
+	if (nvme_completion_is_error(cpl))
+		aer->log_page_size = (uint32_t)-1;
+	else
+		aer->log_page_size = nvme_ctrlr_get_log_page_size(
+		    aer->ctrlr, aer->log_page_id);
+	wakeup(aer);
+	mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct nvme_controller	*ctrlr = aer->ctrlr;
+	uint32_t len;
+
+	/*
+	 * We're resetting, so just punt.
+	 */
+	if (ctrlr->is_resetting)
+		return;
+
+	if (!is_log_page_id_valid(aer->log_page_id)) {
+		/*
+		 * Repost another asynchronous event request to replace the one
+		 * that just completed.
+		 */
+		nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+		    NULL, 0);
+		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+		goto out;
+	}
+
+	aer->log_page_size = 0;
+	len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+	nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+	    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+	    nvme_ctrlr_aer_done, aer);
+	mtx_lock(&aer->mtx);
+	while (aer->log_page_size == 0)
+		mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+	mtx_unlock(&aer->mtx);
+
+	if (aer->log_page_size != (uint32_t)-1) {
+		/*
+		 * If the log page fetch for some reason completed with an
+		 * error, don't pass log page data to the consumers.  In
+		 * practice, this case should never happen.
+		 */
+		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+		    aer->log_page_id, NULL, 0);
+		goto out;
+	}
+
+	/* Convert data to host endian */
+	switch (aer->log_page_id) {
+	case NVME_LOG_ERROR: {
+		struct nvme_error_information_entry *err =
+		    (struct nvme_error_information_entry *)aer->log_page_buffer;
+		for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+			nvme_error_information_entry_swapbytes(err++);
+		break;
+	}
+	case NVME_LOG_HEALTH_INFORMATION:
+		nvme_health_information_page_swapbytes(
+			(struct nvme_health_information_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_CHANGED_NAMESPACE:
+		nvme_ns_list_swapbytes(
+			(struct nvme_ns_list *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_COMMAND_EFFECT:
+		nvme_command_effects_page_swapbytes(
+			(struct nvme_command_effects_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_RES_NOTIFICATION:
+		nvme_res_notification_page_swapbytes(
+			(struct nvme_res_notification_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_SANITIZE_STATUS:
+		nvme_sanitize_status_page_swapbytes(
+			(struct nvme_sanitize_status_page *)aer->log_page_buffer);
+		break;
+	default:
+		break;
+	}
+
+	if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+		struct nvme_health_information_page *health_info =
+		    (struct nvme_health_information_page *)aer->log_page_buffer;
+
+		/*
+		 * Critical warnings reported through the SMART/health log page
+		 * are persistent, so clear the associated bits in the async
+		 * event config so that we do not receive repeated notifications
+		 * for the same event.
+		 */
+		nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+		    health_info->critical_warning);
+		aer->ctrlr->async_event_config &=
+		    ~health_info->critical_warning;
+		nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+		    aer->ctrlr->async_event_config, NULL, NULL);
+	} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+		struct nvme_ns_list *nsl =
+		    (struct nvme_ns_list *)aer->log_page_buffer;
+		for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+			if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+				break;
+			nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+		}
+	}
+
+	/*
+	 * Pass the cpl data from the original async event completion, not the
+	 * log page fetch.
+	 */
+	nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+	    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+out:
+	nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
 /*
  * Poll all the queues enabled on the device for completion.
  */
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	/*
 	 * Create 2 threads for the taskqueue. The reset thread will block when
 	 * it detects that the controller has failed until all I/O has been
-	 * failed up the stack. The fail_req task needs to be able to run in
-	 * this case to finish the request failure for some cases.
-	 *
-	 * We could partially solve this race by draining the failed requeust
-	 * queue before proceding to free the sim, though nothing would stop
-	 * new I/O from coming in after we do that drain, but before we reach
-	 * cam_sim_free, so this big hammer is used instead.
+	 * failed up the stack. The second thread is used for AER events, which
+	 * can block, but only briefly for memory and log page fetching.
 	 */
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	ctrlr->is_initialized = false;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
-	STAILQ_INIT(&ctrlr->fail_req);
+	for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+		struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+		TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+		mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+	}
 	ctrlr->is_failed = false;
 
 	make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 	}
 
 noadminq:
-	if (ctrlr->taskqueue)
+	if (ctrlr->taskqueue) {
 		taskqueue_free(ctrlr->taskqueue);
+		for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+			struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+			mtx_destroy(&aer->mtx);
+		}
+	}
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
 struct nvme_async_event_request {
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
+	struct task			task;
+	struct mtx			mtx;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
 	bool				isr_warned;
 	bool				is_initialized;
 
-	STAILQ_HEAD(, nvme_request)	fail_req;
-
 	/* Host Memory Buffer */
 	int				hmb_nchunks;
 	size_t				hmb_chunk;
diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c
index bb2bc0988e81..245971813854 100644
--- a/sys/dev/nvmf/controller/nvmft_subr.c
+++ b/sys/dev/nvmf/controller/nvmft_subr.c
@@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn)
 	len = strnlen(nqn, NVME_NQN_FIELD_SIZE);
 	if (len == 0 || len > NVMF_NQN_MAX_LEN)
 		return (false);
-
-#ifdef STRICT_CHECKS
-	/*
-	 * Stricter checks from the spec.  Linux does not seem to
-	 * require these.
-	 */
-
-	/*
-	 * NVMF_NQN_MIN_LEN does not include '.', and require at least
-	 * one character of a domain name.
-	 */
-	if (len < NVMF_NQN_MIN_LEN + 2)
-		return (false);
-	if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
-		return (false);
-	nqn += strlen("nqn.");
-
-	/* Next 4 digits must be a year. */
-	for (u_int i = 0; i < 4; i++) {
-		if (!isdigit(nqn[i]))
-			return (false);
-	}
-	nqn += 4;
-
-	/* '-' between year and month. */
-	if (nqn[0] != '-')
-		return (false);
-	nqn++;
-
-	/* 2 digit month. */
-	for (u_int i = 0; i < 2; i++) {
-		if (!isdigit(nqn[i]))
-			return (false);
-	}
-	nqn += 2;
-
-	/* '.' between month and reverse domain name. */
-	if (nqn[0] != '.')
-		return (false);
-#endif
 	return (true);
 }
 
diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c
index 1f72391fb6b4..0efcfeac9eff 100644
--- a/sys/dev/pci/pci_iov.c
+++ b/sys/dev/pci/pci_iov.c
@@ -734,11 +734,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
 	first_rid = pci_get_rid(dev) + rid_off;
 	last_rid = first_rid + (num_vfs - 1) * rid_stride;
 
-	/* We don't yet support allocating extra bus numbers for VFs. */
 	if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
-		device_printf(dev, "not enough PCIe bus numbers for VFs\n");
-		error = ENOSPC;
-		goto out;
+		int rid = 0;
+		uint16_t last_rid_bus = PCI_RID2BUS(last_rid);
+
+		iov->iov_bus_res = bus_alloc_resource(bus, PCI_RES_BUS, &rid,
+		    last_rid_bus, last_rid_bus, 1, RF_ACTIVE);
+		if (iov->iov_bus_res == NULL) {
+			device_printf(dev,
+			    "failed to allocate PCIe bus number for VFs\n");
+			error = ENOSPC;
+			goto out;
+		}
 	}
 
 	if (!ari_enabled && PCI_RID2SLOT(last_rid) != 0) {
@@ -786,6 +793,11 @@ out:
 		}
 	}
 
+	if (iov->iov_bus_res != NULL) {
+		bus_release_resource(bus, iov->iov_bus_res);
+		iov->iov_bus_res = NULL;
+	}
+
 	if (iov->iov_flags & IOV_RMAN_INITED) {
 		rman_fini(&iov->rman);
 		iov->iov_flags &= ~IOV_RMAN_INITED;
@@ -896,6 +908,11 @@ pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
 		}
 	}
 
+	if (iov->iov_bus_res != NULL) {
+		bus_release_resource(bus, iov->iov_bus_res);
+		iov->iov_bus_res = NULL;
+	}
+
 	if (iov->iov_flags & IOV_RMAN_INITED) {
 		rman_fini(&iov->rman);
 		iov->iov_flags &= ~IOV_RMAN_INITED;
diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h
index 7ae2219b936d..ecf0a9b21be5 100644
--- a/sys/dev/pci/pci_iov_private.h
+++ b/sys/dev/pci/pci_iov_private.h
@@ -39,6 +39,8 @@ struct pcicfg_iov {
 	struct cdev *iov_cdev;
 	nvlist_t *iov_schema;
 
+	struct resource *iov_bus_res;
+
 	struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1];
 	struct rman rman;
 	char rman_name[64];
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 9d23d5df1d2b..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2308,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
         else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
 		if_setbaudrate(ifp, IF_Gbps(100));
 
-        if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
         if_setinitfn(ifp, qlnx_init);
         if_setsoftc(ifp, ha);
         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2343,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 
 	if_setcapabilities(ifp, IFCAP_HWCSUM);
 	if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2352,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 	if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
 
 	if_sethwtsomax(ifp,  QLNX_MAX_TSO_FRAME_SIZE -
 				(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
 #include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 struct vm_memseg_12 {
 	int		segid;
 	size_t		len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
 #define	VM_GET_MEMSEG_12	\
 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+	int		segid;
+	size_t		len;
+	char		name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+    "COMPAT_FREEBSD14 ABI");
+
+#define	VM_ALLOC_MEMSEG_14	\
+	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define	VM_GET_MEMSEG_14	\
+	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
 
 struct devmem_softc {
 	int	segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 }
 
 static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+    struct domainset *domainset)
 {
 	char *name;
 	int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 		if (error)
 			goto done;
 	}
-
-	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
 	if (error)
 		goto done;
 
@@ -295,6 +311,20 @@ done:
 	return (error);
 }
 
+#if defined(__amd64__) && \
+    (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+	if (mseg->segid != VM_SYSMEM) {
+		mseg->segid += (VM_BOOTROM - 1);
+	}
+}
+#endif
+
 static int
 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
 	VMMDEV_IOCTL(VM_STAT_DESC, 0),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_REINIT,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
 	VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vmmdev_softc *sc;
 	struct vcpu *vcpu;
 	const struct vmmdev_ioctl *ioctl;
+	struct vm_memseg *mseg;
 	int error, vcpuid;
 
 	sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
 		break;
 	}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	case VM_ALLOC_MEMSEG_12:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg_12 *)0)->name));
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_12 *)0)->name), NULL);
 		break;
 	case VM_GET_MEMSEG_12:
-		error = get_memseg(sc, (struct vm_memseg *)data,
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
 		    sizeof(((struct vm_memseg_12 *)0)->name));
 		break;
-#endif
-	case VM_ALLOC_MEMSEG:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+	case VM_ALLOC_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+		break;
+	case VM_GET_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name));
+		break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+	case VM_ALLOC_MEMSEG: {
+		domainset_t *mask;
+		struct domainset *domainset, domain;
+
+		domainset = NULL;
+		mseg = (struct vm_memseg *)data;
+		if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+			if (mseg->ds_mask_size < sizeof(domainset_t) ||
+			    mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+				error = ERANGE;
+				break;
+			}
+			memset(&domain, 0, sizeof(domain));
+			mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+			error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			error = domainset_populate(&domain, mask, mseg->ds_policy,
+			    mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			domainset = domainset_create(&domain);
+			if (domainset == NULL) {
+				error = EINVAL;
+				free(mask, M_VMMDEV);
+				break;
+			}
+			free(mask, M_VMMDEV);
+		}
+		error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
 		break;
+	}
 	case VM_GET_MEMSEG:
 		error = get_memseg(sc, (struct vm_memseg *)data,
 		    sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_destroy, "A",
-    NULL);
+    "Destroy a vmm(4) instance (legacy interface)");
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_create, "A",
-    NULL);
+    "Create a vmm(4) instance (legacy interface)");
 
 static int
 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
 
 #include <sys/types.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
 }
 
 int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset)
 {
-	struct vm_mem *mem;
 	struct vm_mem_seg *seg;
+	struct vm_mem *mem;
 	vm_object_t obj;
 
 	mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 			return (EINVAL);
 	}
 
+	/*
+	 * When given an impossible policy, signal an
+	 * error to the user.
+	 */
+	if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+		return (EINVAL);
 	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
 	if (obj == NULL)
 		return (ENOMEM);
 
 	seg->len = len;
 	seg->object = obj;
+	if (obj_domainset != NULL)
+		seg->object->domain.dr_policy = obj_domainset;
 	seg->sysmem = sysmem;
+
 	return (0);
 }
 
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
 #ifndef _DEV_VMM_MEM_H_
 #define	_DEV_VMM_MEM_H_
 
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+        VM_SYSMEM = 0,
+        VM_BOOTROM = VM_MAXSYSMEM,
+        VM_FRAMEBUFFER,
+        VM_PCIROM,
+        VM_MEMSEG_END
+};
+
+#define	VM_MAX_MEMSEGS	VM_MEMSEG_END
+#define	VM_MAX_MEMMAPS	(VM_MAX_MEMSEGS * 2)
+
 #ifdef _KERNEL
 
 #include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
 	int		flags;
 };
 
-#define	VM_MAX_MEMSEGS	4
-#define	VM_MAX_MEMMAPS	8
-
 struct vm_mem {
 	struct vm_mem_map	mem_maps[VM_MAX_MEMMAPS];
 	struct vm_mem_seg	mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot, int flags);
 int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset);
 void vm_free_memseg(struct vm *vm, int ident);
 
 /*
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
 
 	/* Reinit VGA mode, to restore view after app which change mode. */
 	vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
-	/* Ask vt(9) to update chars on visible area. */
+	/* Ask vt(4) to update chars on visible area. */
 	vd->vd_flags |= VDF_INVALID;
 }
 
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
 			(vw)->vw_number)
 
 static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "vt(9) parameters");
+    "vt(4) parameters");
 static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
 static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
 static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
 static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");
 
diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h
index cddf88095840..932012b5f52a 100644
--- a/sys/fs/fuse/fuse_internal.h
+++ b/sys/fs/fuse/fuse_internal.h
@@ -208,9 +208,9 @@ fuse_match_cred(struct ucred *basecred, struct ucred *usercred)
 	if (basecred->cr_uid == usercred->cr_uid             &&
 	    basecred->cr_uid == usercred->cr_ruid            &&
 	    basecred->cr_uid == usercred->cr_svuid           &&
-	    basecred->cr_groups[0] == usercred->cr_groups[0] &&
-	    basecred->cr_groups[0] == usercred->cr_rgid      &&
-	    basecred->cr_groups[0] == usercred->cr_svgid)
+	    basecred->cr_gid == usercred->cr_gid             &&
+	    basecred->cr_gid == usercred->cr_rgid            &&
+	    basecred->cr_gid == usercred->cr_svgid)
 		return (0);
 
 	return (EPERM);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
index 0b6048644d32..a751c09159ff 100644
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -868,7 +868,7 @@ fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
 
 	ihead->pid = pid;
 	ihead->uid = cred->cr_uid;
-	ihead->gid = cred->cr_groups[0];
+	ihead->gid = cred->cr_gid;
 }
 
 /*
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index ae28617537fd..32872e8f3f3a 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -884,7 +884,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 		return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
 		    "support different credentials for infd and outfd"));
 
-	if (incred->cr_groups[0] != outcred->cr_groups[0])
+	if (incred->cr_gid != outcred->cr_gid)
 		return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
 		    "support different credentials for infd and outfd"));
 
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 0c94f4e7dc52..222cfc03e4b3 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -379,7 +379,8 @@ newnfs_setroot(struct ucred *cred)
 {
 
 	cred->cr_uid = 0;
-	cred->cr_groups[0] = 0;
+	cred->cr_gid = 0;
+	/* XXXKE Fix this if cr_gid gets separated out. */
 	cred->cr_ngroups = 1;
 }
 
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 2f3c59b68518..36b534be531e 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -6933,7 +6933,8 @@ nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
 		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
 			tcred = NFSNEWCRED(cred);
 			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
-			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
+			tcred->cr_gid = flp->nfsfl_ffm[mirror].group;
+			/* XXXKE Fix this if cr_gid gets separated out. */
 			tcred->cr_ngroups = 1;
 		} else
 			tcred = cred;
diff --git a/sys/fs/pseudofs/pseudofs_vnops.c b/sys/fs/pseudofs/pseudofs_vnops.c
index 0bdfedffafcb..8cd092118d0e 100644
--- a/sys/fs/pseudofs/pseudofs_vnops.c
+++ b/sys/fs/pseudofs/pseudofs_vnops.c
@@ -850,7 +850,7 @@ pfs_readdir(struct vop_readdir_args *va)
 	struct uio *uio;
 	struct pfsentry *pfsent, *pfsent2;
 	struct pfsdirentlist lst;
-	off_t offset;
+	off_t coffset, offset;
 	int error, i, resid;
 
 	STAILQ_INIT(&lst);
@@ -860,6 +860,9 @@ pfs_readdir(struct vop_readdir_args *va)
 	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
 	pfs_assert_not_owned(pd);
 
+	if (va->a_eofflag != NULL)
+		*va->a_eofflag = 0;
+
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	KASSERT_PN_IS_DIR(pd);
@@ -878,6 +881,10 @@ pfs_readdir(struct vop_readdir_args *va)
 	if (pid != NO_PID && !pfs_lookup_proc(pid, &proc))
 		PFS_RETURN (ENOENT);
 
+	/*
+	 * The allproc lock is required in pfs_iterate() for procdir
+	 * directories.
+	 */
 	sx_slock(&allproc_lock);
 	pfs_lock(pd);
 
@@ -897,23 +904,15 @@ pfs_readdir(struct vop_readdir_args *va)
 		}
 	}
 
-	/* skip unwanted entries */
-	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
+	for (pn = NULL, p = NULL, coffset = 0; resid >= PFS_DELEN;
+	    coffset += PFS_DELEN) {
 		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
-			/* nothing left... */
-			if (proc != NULL) {
-				_PRELE(proc);
-				PROC_UNLOCK(proc);
-			}
-			pfs_unlock(pd);
-			sx_sunlock(&allproc_lock);
-			PFS_RETURN (0);
+			if (va->a_eofflag != NULL)
+				*va->a_eofflag = 1;
+			break;
 		}
-	}
-
-	/* fill in entries */
-	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
-	    resid >= PFS_DELEN) {
+		if (coffset < offset)
+			continue;
 		if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV,
 		    M_NOWAIT | M_ZERO)) == NULL) {
 			error = ENOMEM;
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
index 35454998fc8e..8c484381ed59 100644
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -71,7 +71,7 @@ SYSCTL_INT(_vfs_smbfs, OID_AUTO, fastlookup, CTLFLAG_RW, &smbfs_fastlookup, 0, "
 #define DE_SIZE	(sizeof(struct dirent))
 
 static int
-smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred)
+smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred, int *eofp)
 {
 	struct dirent de;
 	struct componentname cn;
@@ -86,6 +86,8 @@ smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred)
 	SMBVDEBUG("dirname='%s'\n", np->n_name);
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, uio->uio_td, cred);
+	if (eofp != NULL)
+		*eofp = 0;
 	offset = uio->uio_offset / DE_SIZE;	/* offset in the directory */
 	limit = uio->uio_resid / DE_SIZE;
 	if (uio->uio_resid < DE_SIZE || uio->uio_offset < 0) {
@@ -138,8 +140,7 @@ smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred)
 		if (error) {
 			smbfs_findclose(np->n_dirseq, scred);
 			np->n_dirseq = NULL;
-			error = ENOENT ? 0 : error;
-			goto out;
+			goto out1;
 		}
 	}
 	error = 0;
@@ -170,16 +171,21 @@ smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred)
 		if (error)
 			break;
 	}
-	if (error == ENOENT)
-		error = 0;
 	uio->uio_offset = offset * DE_SIZE;
+out1:
+	if (error == ENOENT) {
+		if (eofp != NULL)
+			*eofp = 1;
+		error = 0;
+	}
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 
 int
-smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred)
+smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred,
+    int *eofp)
 {
 	struct smbmount *smp = VFSTOSMBFS(vp->v_mount);
 	struct smbnode *np = VTOSMB(vp);
@@ -209,7 +215,7 @@ smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 		lks = LK_EXCLUSIVE;	/* lockstatus(vp->v_vnlock); */
 		if (lks == LK_SHARED)
 			vn_lock(vp, LK_UPGRADE | LK_RETRY);
-		error = smbfs_readvdir(vp, uiop, cred);
+		error = smbfs_readvdir(vp, uiop, cred, eofp);
 		if (lks == LK_SHARED)
 			vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 		return error;
diff --git a/sys/fs/smbfs/smbfs_node.h b/sys/fs/smbfs/smbfs_node.h
index f28f0007100a..8c8ce038b913 100644
--- a/sys/fs/smbfs/smbfs_node.h
+++ b/sys/fs/smbfs/smbfs_node.h
@@ -93,7 +93,7 @@ u_int32_t smbfs_hash(const u_char *name, int nmlen);
 
 int  smbfs_getpages(struct vop_getpages_args *);
 int  smbfs_putpages(struct vop_putpages_args *);
-int  smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred);
+int  smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *eofp);
 int  smbfs_writevnode(struct vnode *vp, struct uio *uiop, struct ucred *cred, int ioflag);
 void smbfs_attr_cacheenter(struct vnode *vp, struct smbfattr *fap);
 int  smbfs_attr_cachelookup(struct vnode *vp ,struct vattr *va);
diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c
index 5d412cabadb8..63b249c93771 100644
--- a/sys/fs/smbfs/smbfs_vnops.c
+++ b/sys/fs/smbfs/smbfs_vnops.c
@@ -466,7 +466,7 @@ smbfs_read(struct vop_read_args *ap)
 	SMBVDEBUG("\n");
 	if (vp->v_type != VREG && vp->v_type != VDIR)
 		return EPERM;
-	return smbfs_readvnode(vp, uio, ap->a_cred);
+	return smbfs_readvnode(vp, uio, ap->a_cred, NULL);
 }
 
 static int
@@ -748,7 +748,6 @@ smbfs_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
-	int error;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
@@ -758,8 +757,7 @@ smbfs_readdir(struct vop_readdir_args *ap)
 		return (EOPNOTSUPP);
 	}
 #endif
-	error = smbfs_readvnode(vp, uio, ap->a_cred);
-	return error;
+	return (smbfs_readvnode(vp, uio, ap->a_cred, ap->a_eofflag));
 }
 
 /* ARGSUSED */
diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c
index 2b1cb575cac8..2173a84c7acf 100644
--- a/sys/geom/concat/g_concat.c
+++ b/sys/geom/concat/g_concat.c
@@ -590,6 +590,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
 		    strcmp(md.md_name, sc->sc_name) != 0 ||
 		    md.md_id != sc->sc_id) {
 			G_CONCAT_DEBUG(0, "Metadata on %s changed.", pp->name);
+			error = EINVAL;
 			goto fail;
 		}
 
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index dcd6f793f9f7..908ce86f03a6 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -282,7 +282,7 @@ void g_detach(struct g_consumer *cp);
 void g_error_provider(struct g_provider *pp, int error);
 struct g_provider *g_provider_by_name(char const *arg);
 int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len);
-#define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v))
+#define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof(*(v)))
 int g_handleattr(struct bio *bp, const char *attribute, const void *val,
     int len);
 int g_handleattr_int(struct bio *bp, const char *attribute, int val);
diff --git a/sys/geom/geom_ccd.c b/sys/geom/geom_ccd.c
index 5700399ee5d1..2140d005160e 100644
--- a/sys/geom/geom_ccd.c
+++ b/sys/geom/geom_ccd.c
@@ -730,17 +730,17 @@ g_ccd_create(struct gctl_req *req, struct g_class *mp)
 	int i, error;
 
 	g_topology_assert();
-	unit = gctl_get_paraml(req, "unit", sizeof (*unit));
+	unit = gctl_get_paraml(req, "unit", sizeof(*unit));
 	if (unit == NULL) {
 		gctl_error(req, "unit parameter not given");
 		return;
 	}
-	ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
+	ileave = gctl_get_paraml(req, "ileave", sizeof(*ileave));
 	if (ileave == NULL) {
 		gctl_error(req, "ileave parameter not given");
 		return;
 	}
-	nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
+	nprovider = gctl_get_paraml(req, "nprovider", sizeof(*nprovider));
 	if (nprovider == NULL) {
 		gctl_error(req, "nprovider parameter not given");
 		return;
@@ -769,7 +769,7 @@ g_ccd_create(struct gctl_req *req, struct g_class *mp)
 	}
 
 	gp = g_new_geomf(mp, "ccd%d", *unit);
-	sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
+	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	gp->softc = sc;
 	sc->sc_ndisks = *nprovider;
 
@@ -872,7 +872,7 @@ g_ccd_list(struct gctl_req *req, struct g_class *mp)
 	struct g_geom *gp;
 	int i, unit, *up;
 
-	up = gctl_get_paraml(req, "unit", sizeof (*up));
+	up = gctl_get_paraml(req, "unit", sizeof(*up));
 	if (up == NULL) {
 		gctl_error(req, "unit parameter not given");
 		return;
diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c
index 0a76fd6c6f57..341233a6ef47 100644
--- a/sys/geom/geom_event.c
+++ b/sys/geom/geom_event.c
@@ -145,7 +145,7 @@ g_attr_changed(struct g_provider *pp, const char *attr, int flag)
 	struct g_attrchanged_args *args;
 	int error;
 
-	args = g_malloc(sizeof *args, flag);
+	args = g_malloc(sizeof(*args), flag);
 	if (args == NULL)
 		return (ENOMEM);
 	args->pp = pp;
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index 8d6b9a926e1d..247a623bf1bf 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -278,7 +278,7 @@ g_io_init(void)
 
 	g_bioq_init(&g_bio_run_down);
 	g_bioq_init(&g_bio_run_up);
-	biozone = uma_zcreate("g_bio", sizeof (struct bio),
+	biozone = uma_zcreate("g_bio", sizeof(struct bio),
 	    NULL, NULL,
 	    NULL, NULL,
 	    0, 0);
diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c
index 8cfffc478849..0491b0069be4 100644
--- a/sys/geom/geom_slice.c
+++ b/sys/geom/geom_slice.c
@@ -57,7 +57,7 @@ g_slice_alloc(unsigned nslice, unsigned scsize)
 {
 	struct g_slicer *gsp;
 
-	gsp = g_malloc(sizeof *gsp, M_WAITOK | M_ZERO);
+	gsp = g_malloc(sizeof(*gsp), M_WAITOK | M_ZERO);
 	if (scsize > 0)
 		gsp->softc = g_malloc(scsize, M_WAITOK | M_ZERO);
 	else
@@ -463,9 +463,9 @@ g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int r
 	}
 	gsl = gsp->hotspot;
 	if(idx >= gsp->nhotspot) {
-		gsl2 = g_malloc((idx + 1) * sizeof *gsl2, M_WAITOK | M_ZERO);
+		gsl2 = g_malloc((idx + 1) * sizeof(*gsl2), M_WAITOK | M_ZERO);
 		if (gsp->hotspot != NULL)
-			bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof *gsl2);
+			bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof(*gsl2));
 		gsp->hotspot = gsl2;
 		if (gsp->hotspot != NULL)
 			g_free(gsl);
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index 41cc115225f9..1429c84942ed 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -267,7 +267,7 @@ g_modevent(module_t mod, int type, void *data)
 	switch (type) {
 	case MOD_LOAD:
 		g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name);
-		hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
+		hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO);
 		hh->mp = mp;
 		/*
 		 * Once the system is not cold, MOD_LOAD calls will be
@@ -351,7 +351,7 @@ g_retaste(struct g_class *mp)
 	if (mp->taste == NULL)
 		return (EINVAL);
 
-	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
+	hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO);
 	hh->mp = mp;
 
 	if (cold) {
@@ -381,8 +381,8 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...)
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
-	gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
-	gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+	gp = g_malloc(sizeof(*gp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+	gp->name = (char *)(gp + 1);
 	gp->class = mp;
 	gp->rank = 1;
 	LIST_INIT(&gp->consumer);
@@ -420,7 +420,6 @@ g_destroy_geom(struct g_geom *gp)
 	g_cancel_event(gp);
 	LIST_REMOVE(gp, geom);
 	TAILQ_REMOVE(&geoms, gp, geoms);
-	g_free(gp->name);
 	g_free(gp);
 }
 
@@ -528,7 +527,7 @@ g_new_consumer(struct g_geom *gp)
 	    ("g_new_consumer on geom(%s) (class %s) without orphan",
 	    gp->name, gp->class->name));
 
-	cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
+	cp = g_malloc(sizeof(*cp), M_WAITOK | M_ZERO);
 	cp->geom = gp;
 	cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
 	    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
@@ -617,7 +616,7 @@ g_new_providerf(struct g_geom *gp, const char *fmt, ...)
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
-	pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
+	pp = g_malloc(sizeof(*pp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
 	pp->name = (char *)(pp + 1);
 	strcpy(pp->name, sbuf_data(sb));
 	sbuf_delete(sb);
@@ -749,7 +748,7 @@ g_resize_provider(struct g_provider *pp, off_t size)
 	if (size == pp->mediasize)
 		return;
 
-	hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
+	hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO);
 	hh->pp = pp;
 	hh->size = size;
 	g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL);
@@ -1083,21 +1082,21 @@ int
 g_handleattr_int(struct bio *bp, const char *attribute, int val)
 {
 
-	return (g_handleattr(bp, attribute, &val, sizeof val));
+	return (g_handleattr(bp, attribute, &val, sizeof(val)));
 }
 
 int
 g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val)
 {
 
-	return (g_handleattr(bp, attribute, &val, sizeof val));
+	return (g_handleattr(bp, attribute, &val, sizeof(val)));
 }
 
 int
 g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
 {
 
-	return (g_handleattr(bp, attribute, &val, sizeof val));
+	return (g_handleattr(bp, attribute, &val, sizeof(val)));
 }
 
 int
diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c
index 23088c895541..a4935df7eaa1 100644
--- a/sys/geom/multipath/g_multipath.c
+++ b/sys/geom/multipath/g_multipath.c
@@ -321,7 +321,7 @@ g_multipath_resize(struct g_consumer *cp)
 	if (sc->sc_uuid[0] != 0) {
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
-		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
+		memcpy(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid));
 		strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = size;
@@ -552,8 +552,8 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
 	gp = g_new_geomf(mp, "%s", md->md_name);
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF);
-	memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid));
-	memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name));
+	memcpy(sc->sc_uuid, md->md_uuid, sizeof(sc->sc_uuid));
+	memcpy(sc->sc_name, md->md_name, sizeof(sc->sc_name));
 	sc->sc_active_active = md->md_active_active;
 	sc->sc_size = md->md_size;
 	gp->softc = sc;
@@ -906,7 +906,7 @@ g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 			char buf[16];
 			u_long rand = random();
 
-			snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand);
+			snprintf(buf, sizeof(buf), "%s-%lu", md.md_name, rand);
 			printf("GEOM_MULTIPATH: geom %s/%s exists already\n",
 			    sc->sc_name, sc->sc_uuid);
 			printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n",
@@ -1200,7 +1200,7 @@ g_multipath_ctl_configure(struct gctl_req *req, struct g_class *mp)
 		cp = sc->sc_active;
 		pp = cp->provider;
 		strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic));
-		memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid));
+		memcpy(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid));
 		strlcpy(md.md_name, name, sizeof(md.md_name));
 		md.md_version = G_MULTIPATH_VERSION;
 		md.md_size = pp->mediasize;
diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c
index b8cf32875660..c7d737493f11 100644
--- a/sys/geom/virstor/g_virstor.c
+++ b/sys/geom/virstor/g_virstor.c
@@ -202,7 +202,7 @@ virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 	int *force, *nargs;
 	int i;
 
-	nargs = gctl_get_paraml(req, "nargs", sizeof *nargs);
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
 	if (nargs == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "nargs");
 		return;
@@ -211,7 +211,7 @@ virstor_ctl_stop(struct gctl_req *req, struct g_class *cp)
 		gctl_error(req, "Invalid number of arguments");
 		return;
 	}
-	force = gctl_get_paraml(req, "force", sizeof *force);
+	force = gctl_get_paraml(req, "force", sizeof(*force));
 	if (force == NULL) {
 		gctl_error(req, "Error fetching argument '%s'", "force");
 		return;
@@ -315,7 +315,7 @@ virstor_ctl_add(struct gctl_req *req, struct g_class *cp)
 		u_int nc;
 		u_int j;
 
-		snprintf(aname, sizeof aname, "arg%d", i);
+		snprintf(aname, sizeof(aname), "arg%d", i);
 		pp = gctl_get_provider(req, aname);
 		if (pp == NULL) {
 			/* This is the most common error so be verbose about it */
@@ -487,12 +487,12 @@ fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
 {
 	struct g_virstor_component *c;
 
-	bzero(md, sizeof *md);
+	bzero(md, sizeof(*md));
 	c = &sc->components[nc];
 
-	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic);
+	strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof(md->md_magic));
 	md->md_version = G_VIRSTOR_VERSION;
-	strncpy(md->md_name, sc->geom->name, sizeof md->md_name);
+	strncpy(md->md_name, sc->geom->name, sizeof(md->md_name));
 	md->md_id = sc->id;
 	md->md_virsize = sc->virsize;
 	md->md_chunk_size = sc->chunk_size;
@@ -500,7 +500,7 @@ fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md,
 
 	if (hardcode) {
 		strncpy(md->provider, c->gcons->provider->name,
-		    sizeof md->provider);
+		    sizeof(md->provider));
 	}
 	md->no = nc;
 	md->provsize = c->gcons->provider->mediasize;
@@ -589,7 +589,7 @@ virstor_ctl_remove(struct gctl_req *req, struct g_class *cp)
 		    M_GVIRSTOR, M_WAITOK | M_ZERO);
 		bcopy(sc->components, newcomp, found * sizeof(*sc->components));
 		bcopy(&sc->components[found + 1], newcomp + found,
-		    found * sizeof(*sc->components));
+		    (sc->n_components - (found + 1)) * sizeof(*sc->components));
 		if ((sc->components[j].flags & VIRSTOR_PROVIDER_ALLOCATED) != 0) {
 			LOG_MSG(LVL_ERROR, "Allocated provider %s cannot be "
 			    "removed from %s",
@@ -959,7 +959,7 @@ virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force,
 
 	free(sc->map, M_GVIRSTOR);
 	free(sc->components, M_GVIRSTOR);
-	bzero(sc, sizeof *sc);
+	bzero(sc, sizeof(*sc));
 	free(sc, M_GVIRSTOR);
 
 	pp = LIST_FIRST(&gp->provider); /* We only offer one provider */
@@ -1213,7 +1213,7 @@ virstor_check_and_run(struct g_virstor_softc *sc)
 		    sc->provider->name,
 		    sc->chunk_count * (off_t)sc->chunk_size);
 	}
-	sc->map_size = sc->chunk_count * sizeof *(sc->map);
+	sc->map_size = sc->chunk_count * sizeof(*(sc->map));
 	/* The following allocation is in order of 4MB - 8MB */
 	sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK);
 	KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s",
@@ -1267,7 +1267,7 @@ virstor_check_and_run(struct g_virstor_softc *sc)
 		bcopy(mapbuf, &sc->map[n], bs);
 		off += bs;
 		count += bs;
-		n += bs / sizeof *(sc->map);
+		n += bs / sizeof(*(sc->map));
 		g_free(mapbuf);
 	}
 	g_access(sc->components[0].gcons, -1, 0, 0);
@@ -1306,8 +1306,8 @@ virstor_check_and_run(struct g_virstor_softc *sc)
 		    sc->components[index].chunk_next);
 	}
 
-	sc->me_per_sector = sc->sectorsize / sizeof *(sc->map);
-	if (sc->sectorsize % sizeof *(sc->map) != 0) {
+	sc->me_per_sector = sc->sectorsize / sizeof(*(sc->map));
+	if (sc->sectorsize % sizeof(*(sc->map)) != 0) {
 		LOG_MSG(LVL_ERROR,
 		    "%s: Map entries don't fit exactly in a sector (%s)",
 		    __func__, sc->geom->name);
@@ -1653,7 +1653,7 @@ g_virstor_start(struct bio *b)
 					 * XXX: this will prevent the fs from
 					 * being umounted! */
 					struct g_virstor_bio_q *biq;
-					biq = malloc(sizeof *biq, M_GVIRSTOR,
+					biq = malloc(sizeof(*biq), M_GVIRSTOR,
 					    M_NOWAIT);
 					if (biq == NULL) {
 						bioq_dismantle(&bq);
@@ -1703,7 +1703,7 @@ g_virstor_start(struct bio *b)
 				 * map array.
 				 * sc_offset will end up pointing to the drive
 				 * sector. */
-				s_offset = chunk_index * sizeof *me;
+				s_offset = chunk_index * sizeof(*me);
 				s_offset = rounddown(s_offset, sc->sectorsize);
 
 				/* data_me points to map entry sector
diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c
new file mode 100644
index 000000000000..8b857e9aa4a2
--- /dev/null
+++ b/sys/kern/coredump_vnode.c
@@ -0,0 +1,562 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause
+ *
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * - kern_sig.c
+ */
+/*
+ * Copyright (c) 1993, David Greenman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * -kern_exec.c
+ */
+
+#include <sys/systm.h>
+#include <sys/acct.h>
+#include <sys/compressor.h>
+#include <sys/devctl.h>
+#include <sys/fcntl.h>
+#include <sys/jail.h>
+#include <sys/limits.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/syslog.h>
+#include <sys/ucoredump.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+#include <security/audit/audit.h>
+
+#define	GZIP_SUFFIX	".gz"
+#define	ZSTD_SUFFIX	".zst"
+
+#define	MAX_NUM_CORE_FILES 100000
+#ifndef NUM_CORE_FILES
+#define	NUM_CORE_FILES 5
+#endif
+
+static coredumper_handle_fn	coredump_vnode;
+static struct coredumper vnode_coredumper = {
+	.cd_name = "vnode_coredumper",
+	.cd_handle = coredump_vnode,
+};
+
+SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY,
+    coredumper_register, &vnode_coredumper);
+
+_Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES,
+    "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")");
+static int num_cores = NUM_CORE_FILES;
+
+static int capmode_coredump;
+SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
+    &capmode_coredump, 0, "Allow processes in capability mode to dump core");
+
+static int set_core_nodump_flag = 0;
+SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
+	0, "Enable setting the NODUMP flag on coredump files");
+
+static int coredump_devctl = 0;
+SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
+	0, "Generate a devctl notification when processes coredump");
+
+/*
+ * corefilename[] is protected by the allproc_lock.
+ */
+static char corefilename[MAXPATHLEN] = { "%N.core" };
+TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
+
+static int
+sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+
+	sx_xlock(&allproc_lock);
+	error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
+	    req);
+	sx_xunlock(&allproc_lock);
+
+	return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
+    CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
+    "Process corefile name format string");
+
+static int
+sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	int new_val;
+
+	new_val = num_cores;
+	error = sysctl_handle_int(oidp, &new_val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (new_val > MAX_NUM_CORE_FILES)
+		new_val = MAX_NUM_CORE_FILES;
+	if (new_val < 0)
+		new_val = 0;
+	num_cores = new_val;
+	return (0);
+}
+SYSCTL_PROC(_debug, OID_AUTO, ncores,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
+    sysctl_debug_num_cores_check, "I",
+    "Maximum number of generated process corefiles while using index format");
+
+static void
+vnode_close_locked(struct thread *td, struct vnode *vp)
+{
+
+	VOP_UNLOCK(vp);
+	vn_close(vp, FWRITE, td->td_ucred, td);
+}
+
+int
+core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len,
+    off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid,
+    struct thread *td)
+{
+	struct coredump_vnode_ctx *ctx = cdw->ctx;
+
+	return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base),
+	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
+	    cred, ctx->fcred, resid, td));
+}
+
+int
+core_vn_extend(const struct coredump_writer *cdw, off_t newsz,
+    struct ucred *cred)
+{
+	struct coredump_vnode_ctx *ctx = cdw->ctx;
+	struct mount *mp;
+	int error;
+
+	error = vn_start_write(ctx->vp, &mp, V_WAIT);
+	if (error != 0)
+		return (error);
+	vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY);
+	error = vn_truncate_locked(ctx->vp, newsz, false, cred);
+	VOP_UNLOCK(ctx->vp);
+	vn_finished_write(mp);
+	return (error);
+}
+
+/*
+ * If the core format has a %I in it, then we need to check
+ * for existing corefiles before defining a name.
+ * To do this we iterate over 0..ncores to find a
+ * non-existing core file name to use. If all core files are
+ * already used we choose the oldest one.
+ */
+static int
+corefile_open_last(struct thread *td, char *name, int indexpos,
+    int indexlen, int ncores, struct vnode **vpp)
+{
+	struct vnode *oldvp, *nextvp, *vp;
+	struct vattr vattr;
+	struct nameidata nd;
+	int error, i, flags, oflags, cmode;
+	char ch;
+	struct timespec lasttime;
+
+	nextvp = oldvp = NULL;
+	cmode = S_IRUSR | S_IWUSR;
+	oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
+	    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
+
+	for (i = 0; i < ncores; i++) {
+		flags = O_CREAT | FWRITE | O_NOFOLLOW;
+
+		ch = name[indexpos + indexlen];
+		(void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
+		    i);
+		name[indexpos + indexlen] = ch;
+
+		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
+		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
+		    NULL);
+		if (error != 0)
+			break;
+
+		vp = nd.ni_vp;
+		NDFREE_PNBUF(&nd);
+		if ((flags & O_CREAT) == O_CREAT) {
+			nextvp = vp;
+			break;
+		}
+
+		error = VOP_GETATTR(vp, &vattr, td->td_ucred);
+		if (error != 0) {
+			vnode_close_locked(td, vp);
+			break;
+		}
+
+		if (oldvp == NULL ||
+		    lasttime.tv_sec > vattr.va_mtime.tv_sec ||
+		    (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
+		    lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
+			if (oldvp != NULL)
+				vn_close(oldvp, FWRITE, td->td_ucred, td);
+			oldvp = vp;
+			VOP_UNLOCK(oldvp);
+			lasttime = vattr.va_mtime;
+		} else {
+			vnode_close_locked(td, vp);
+		}
+	}
+
+	if (oldvp != NULL) {
+		if (nextvp == NULL) {
+			if ((td->td_proc->p_flag & P_SUGID) != 0) {
+				error = EFAULT;
+				vn_close(oldvp, FWRITE, td->td_ucred, td);
+			} else {
+				nextvp = oldvp;
+				error = vn_lock(nextvp, LK_EXCLUSIVE);
+				if (error != 0) {
+					vn_close(nextvp, FWRITE, td->td_ucred,
+					    td);
+					nextvp = NULL;
+				}
+			}
+		} else {
+			vn_close(oldvp, FWRITE, td->td_ucred, td);
+		}
+	}
+	if (error != 0) {
+		if (nextvp != NULL)
+			vnode_close_locked(td, oldvp);
+	} else {
+		*vpp = nextvp;
+	}
+
+	return (error);
+}
+
+/*
+ * corefile_open(comm, uid, pid, td, compress, vpp, namep)
+ * Expand the name described in corefilename, using name, uid, and pid
+ * and open/create core file.
+ * corefilename is a printf-like string, with three format specifiers:
+ *	%N	name of process ("name")
+ *	%P	process id (pid)
+ *	%U	user id (uid)
+ * For example, "%N.core" is the default; they can be disabled completely
+ * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
+ * This is controlled by the sysctl variable kern.corefile (see above).
+ */
+static int
+corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
+    int compress, int signum, struct vnode **vpp, char **namep)
+{
+	struct sbuf sb;
+	struct nameidata nd;
+	const char *format;
+	char *hostname, *name;
+	int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
+
+	hostname = NULL;
+	format = corefilename;
+	name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
+	indexlen = 0;
+	indexpos = -1;
+	ncores = num_cores;
+	(void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
+	sx_slock(&allproc_lock);
+	for (i = 0; format[i] != '\0'; i++) {
+		switch (format[i]) {
+		case '%':	/* Format character */
+			i++;
+			switch (format[i]) {
+			case '%':
+				sbuf_putc(&sb, '%');
+				break;
+			case 'H':	/* hostname */
+				if (hostname == NULL) {
+					hostname = malloc(MAXHOSTNAMELEN,
+					    M_TEMP, M_WAITOK);
+				}
+				getcredhostname(td->td_ucred, hostname,
+				    MAXHOSTNAMELEN);
+				sbuf_cat(&sb, hostname);
+				break;
+			case 'I':	/* autoincrementing index */
+				if (indexpos != -1) {
+					sbuf_printf(&sb, "%%I");
+					break;
+				}
+
+				indexpos = sbuf_len(&sb);
+				sbuf_printf(&sb, "%u", ncores - 1);
+				indexlen = sbuf_len(&sb) - indexpos;
+				break;
+			case 'N':	/* process name */
+				sbuf_printf(&sb, "%s", comm);
+				break;
+			case 'P':	/* process id */
+				sbuf_printf(&sb, "%u", pid);
+				break;
+			case 'S':	/* signal number */
+				sbuf_printf(&sb, "%i", signum);
+				break;
+			case 'U':	/* user id */
+				sbuf_printf(&sb, "%u", uid);
+				break;
+			default:
+				log(LOG_ERR,
+				    "Unknown format character %c in "
+				    "corename `%s'\n", format[i], format);
+				break;
+			}
+			break;
+		default:
+			sbuf_putc(&sb, format[i]);
+			break;
+		}
+	}
+	sx_sunlock(&allproc_lock);
+	free(hostname, M_TEMP);
+	if (compress == COMPRESS_GZIP)
+		sbuf_cat(&sb, GZIP_SUFFIX);
+	else if (compress == COMPRESS_ZSTD)
+		sbuf_cat(&sb, ZSTD_SUFFIX);
+	if (sbuf_error(&sb) != 0) {
+		log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
+		    "long\n", (long)pid, comm, (u_long)uid);
+		sbuf_delete(&sb);
+		free(name, M_TEMP);
+		return (ENOMEM);
+	}
+	sbuf_finish(&sb);
+	sbuf_delete(&sb);
+
+	if (indexpos != -1) {
+		error = corefile_open_last(td, name, indexpos, indexlen, ncores,
+		    vpp);
+		if (error != 0) {
+			log(LOG_ERR,
+			    "pid %d (%s), uid (%u):  Path `%s' failed "
+			    "on initial open test, error = %d\n",
+			    pid, comm, uid, name, error);
+		}
+	} else {
+		cmode = S_IRUSR | S_IWUSR;
+		oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
+		    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
+		flags = O_CREAT | FWRITE | O_NOFOLLOW;
+		if ((td->td_proc->p_flag & P_SUGID) != 0)
+			flags |= O_EXCL;
+
+		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
+		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
+		    NULL);
+		if (error == 0) {
+			*vpp = nd.ni_vp;
+			NDFREE_PNBUF(&nd);
+		}
+	}
+
+	if (error != 0) {
+#ifdef AUDIT
+		audit_proc_coredump(td, name, error);
+#endif
+		free(name, M_TEMP);
+		return (error);
+	}
+	*namep = name;
+	return (0);
+}
+
+/*
+ * The vnode dumper is the traditional coredump handler.  Our policy and limits
+ * are generally checked already, so it creates the coredump name and passes on
+ * a vnode and a size limit to the process-specific coredump routine if there is
+ * one.  If there _is not_ one, it returns ENOSYS; otherwise it returns the
+ * error from the process-specific routine.
+ */
+static int
+coredump_vnode(struct thread *td, off_t limit)
+{
+	struct proc *p = td->td_proc;
+	struct ucred *cred = td->td_ucred;
+	struct vnode *vp;
+	struct coredump_vnode_ctx wctx;
+	struct coredump_writer cdw = { };
+	struct flock lf;
+	struct vattr vattr;
+	size_t fullpathsize;
+	int error, error1, jid, locked, ppid, sig;
+	char *name;			/* name of corefile */
+	void *rl_cookie;
+	char *fullpath, *freepath = NULL;
+	struct sbuf *sb;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	ppid = p->p_oppid;
+	sig = p->p_sig;
+	jid = p->p_ucred->cr_prison->pr_id;
+	PROC_UNLOCK(p);
+
+	error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
+	    compress_user_cores, sig, &vp, &name);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Don't dump to non-regular files or files with links.
+	 * Do not dump into system files. Effective user must own the corefile.
+	 */
+	if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
+	    vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
+	    vattr.va_uid != cred->cr_uid) {
+		VOP_UNLOCK(vp);
+		error = EFAULT;
+		goto out;
+	}
+
+	VOP_UNLOCK(vp);
+
+	/* Postpone other writers, including core dumps of other processes. */
+	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
+
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
+	lf.l_type = F_WRLCK;
+	locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
+
+	VATTR_NULL(&vattr);
+	vattr.va_size = 0;
+	if (set_core_nodump_flag)
+		vattr.va_flags = UF_NODUMP;
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	VOP_SETATTR(vp, &vattr, cred);
+	VOP_UNLOCK(vp);
+	PROC_LOCK(p);
+	p->p_acflag |= ACORE;
+	PROC_UNLOCK(p);
+
+	wctx.vp = vp;
+	wctx.fcred = NOCRED;
+
+	cdw.ctx = &wctx;
+	cdw.write_fn = core_vn_write;
+	cdw.extend_fn = core_vn_extend;
+
+	if (p->p_sysent->sv_coredump != NULL) {
+		error = p->p_sysent->sv_coredump(td, &cdw, limit, 0);
+	} else {
+		error = ENOSYS;
+	}
+
+	if (locked) {
+		lf.l_type = F_UNLCK;
+		VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
+	}
+	vn_rangelock_unlock(vp, rl_cookie);
+
+	/*
+	 * Notify the userland helper that a process triggered a core dump.
+	 * This allows the helper to run an automated debugging session.
+	 */
+	if (error != 0 || coredump_devctl == 0)
+		goto out;
+	sb = sbuf_new_auto();
+	if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0)
+		goto out2;
+	sbuf_cat(sb, "comm=\"");
+	devctl_safe_quote_sb(sb, fullpath);
+	free(freepath, M_TEMP);
+	sbuf_cat(sb, "\" core=\"");
+
+	/*
+	 * We can't lookup core file vp directly. When we're replacing a core, and
+	 * other random times, we flush the name cache, so it will fail. Instead,
+	 * if the path of the core is relative, add the current dir in front if it.
+	 */
+	if (name[0] != '/') {
+		fullpathsize = MAXPATHLEN;
+		freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
+		if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) {
+			free(freepath, M_TEMP);
+			goto out2;
+		}
+		devctl_safe_quote_sb(sb, fullpath);
+		free(freepath, M_TEMP);
+		sbuf_putc(sb, '/');
+	}
+	devctl_safe_quote_sb(sb, name);
+	sbuf_putc(sb, '"');
+
+	sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d",
+	    jid, p->p_pid, ppid, sig);
+	if (sbuf_finish(sb) == 0)
+		devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
+out2:
+	sbuf_delete(sb);
+out:
+	error1 = vn_close(vp, FWRITE, cred, td);
+	if (error == 0)
+		error = error1;
+#ifdef AUDIT
+	audit_proc_coredump(td, name, error);
+#endif
+	free(name, M_TEMP);
+	return (error);
+}
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index b7ffbe68b483..2690ad3b2679 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -64,6 +64,7 @@
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
+#include <sys/ucoredump.h>
 #include <sys/vnode.h>
 #include <sys/syslog.h>
 #include <sys/eventhandler.h>
@@ -1562,9 +1563,6 @@ struct note_info {
 
 TAILQ_HEAD(note_info_list, note_info);
 
-extern int compress_user_cores;
-extern int compress_user_cores_level;
-
 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
 static void each_dumpable_segment(struct thread *, segment_callback, void *,
@@ -1595,7 +1593,7 @@ core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 }
 
 int
-__elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
+__elfN(coredump)(struct thread *td, struct coredump_writer *cdw, off_t limit, int flags)
 {
 	struct ucred *cred = td->td_ucred;
 	int compm, error = 0;
@@ -1625,9 +1623,8 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 	/* Set up core dump parameters. */
 	params.offset = 0;
 	params.active_cred = cred;
-	params.file_cred = NOCRED;
 	params.td = td;
-	params.vp = vp;
+	params.cdw = cdw;
 	params.comp = NULL;
 
 #ifdef RACCT
@@ -1662,6 +1659,12 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 		tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
         }
 
+	if (cdw->init_fn != NULL) {
+		error = (*cdw->init_fn)(cdw, &params);
+		if (error != 0)
+			goto done;
+	}
+
 	/*
 	 * Allocate memory for building the header, fill it up,
 	 * and write it out following the notes.
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index 5d9e2f2f326b..d7eb82d5f259 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -530,7 +530,7 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist)
  * remove them and update the domainset accordingly.  If only empty
  * domains are present, we must return failure.
  */
-static bool
+bool
 domainset_empty_vm(struct domainset *domain)
 {
 	domainset_t empty;
@@ -2409,82 +2409,92 @@ sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap)
 }
 
 int
-kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
-    id_t id, size_t domainsetsize, const domainset_t *maskp, int policy,
-    const struct cpuset_copy_cb *cb)
+domainset_populate(struct domainset *domain, const domainset_t *mask, int policy,
+    size_t mask_size)
 {
-	struct cpuset *nset;
-	struct cpuset *set;
-	struct thread *ttd;
-	struct proc *p;
-	struct domainset domain;
-	domainset_t *mask;
-	int error;
 
-	if (domainsetsize < sizeof(domainset_t) ||
-	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
-		return (ERANGE);
 	if (policy <= DOMAINSET_POLICY_INVALID ||
-	    policy > DOMAINSET_POLICY_MAX)
+	    policy > DOMAINSET_POLICY_MAX) {
 		return (EINVAL);
-	error = cpuset_check_capabilities(td, level, which, id);
-	if (error != 0)
-		return (error);
-	memset(&domain, 0, sizeof(domain));
-	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
-	error = cb->cpuset_copyin(maskp, mask, domainsetsize);
-	if (error)
-		goto out;
+	}
+
 	/*
 	 * Verify that no high bits are set.
 	 */
-	if (domainsetsize > sizeof(domainset_t)) {
-		char *end;
-		char *cp;
+	if (mask_size > sizeof(domainset_t)) {
+		const char *end;
+		const char *cp;
 
-		end = cp = (char *)&mask->__bits;
-		end += domainsetsize;
+		end = cp = (const char *)&mask->__bits;
+		end += mask_size;
 		cp += sizeof(domainset_t);
-		while (cp != end)
+		while (cp != end) {
 			if (*cp++ != 0) {
-				error = EINVAL;
-				goto out;
+				return (EINVAL);
 			}
+		}
 	}
 	if (DOMAINSET_EMPTY(mask)) {
-		error = EDEADLK;
-		goto out;
+		return (EDEADLK);
 	}
-	DOMAINSET_COPY(mask, &domain.ds_mask);
-	domain.ds_policy = policy;
+	DOMAINSET_COPY(mask, &domain->ds_mask);
+	domain->ds_policy = policy;
 
 	/*
 	 * Sanitize the provided mask.
 	 */
-	if (!DOMAINSET_SUBSET(&all_domains, &domain.ds_mask)) {
-		error = EINVAL;
-		goto out;
+	if (!DOMAINSET_SUBSET(&all_domains, &domain->ds_mask)) {
+		return (EINVAL);
 	}
 
 	/* Translate preferred policy into a mask and fallback. */
 	if (policy == DOMAINSET_POLICY_PREFER) {
 		/* Only support a single preferred domain. */
-		if (DOMAINSET_COUNT(&domain.ds_mask) != 1) {
-			error = EINVAL;
-			goto out;
+		if (DOMAINSET_COUNT(&domain->ds_mask) != 1) {
+			return (EINVAL);
 		}
-		domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1;
+		domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1;
 		/* This will be constrained by domainset_shadow(). */
-		DOMAINSET_COPY(&all_domains, &domain.ds_mask);
+		DOMAINSET_COPY(&all_domains, &domain->ds_mask);
 	}
 
+	return (0);
+}
+
+int
+kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
+    id_t id, size_t domainsetsize, const domainset_t *maskp, int policy,
+    const struct cpuset_copy_cb *cb)
+{
+	struct cpuset *nset;
+	struct cpuset *set;
+	struct thread *ttd;
+	struct proc *p;
+	struct domainset domain;
+	domainset_t *mask;
+	int error;
+
+	error = cpuset_check_capabilities(td, level, which, id);
+	if (error != 0)
+		return (error);
+	if (domainsetsize < sizeof(domainset_t) ||
+	    domainsetsize > DOMAINSET_MAXSIZE / NBBY)
+		return (ERANGE);
+	memset(&domain, 0, sizeof(domain));
+	mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
+	error = cb->cpuset_copyin(maskp, mask, domainsetsize);
+	if (error)
+		goto out;
+	error = domainset_populate(&domain, mask, policy, domainsetsize);
+	if (error)
+		goto out;
+
 	/*
 	 * When given an impossible policy, fall back to interleaving
 	 * across all domains.
 	 */
 	if (domainset_empty_vm(&domain))
 		domainset_copy(domainset2, &domain);
-
 	switch (level) {
 	case CPU_LEVEL_ROOT:
 	case CPU_LEVEL_CPUSET:
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 03268365891e..0fc2d0e7f1bc 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -70,6 +70,7 @@
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
+#include <sys/ucoredump.h>
 #include <sys/umtxvar.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
@@ -2002,10 +2003,14 @@ int
 core_write(struct coredump_params *cp, const void *base, size_t len,
     off_t offset, enum uio_seg seg, size_t *resid)
 {
+	return ((*cp->cdw->write_fn)(cp->cdw, base, len, offset, seg,
+	    cp->active_cred, resid, cp->td));
+}
 
-	return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base),
-	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
-	    cp->active_cred, cp->file_cred, resid, cp->td));
+static int
+core_extend(struct coredump_params *cp, off_t newsz)
+{
+	return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->active_cred));
 }
 
 int
@@ -2013,7 +2018,6 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
     void *tmpbuf)
 {
 	vm_map_t map;
-	struct mount *mp;
 	size_t resid, runlen;
 	int error;
 	bool success;
@@ -2068,14 +2072,7 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
 			}
 		}
 		if (!success) {
-			error = vn_start_write(cp->vp, &mp, V_WAIT);
-			if (error != 0)
-				break;
-			vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY);
-			error = vn_truncate_locked(cp->vp, offset + runlen,
-			    false, cp->td->td_ucred);
-			VOP_UNLOCK(cp->vp);
-			vn_finished_write(mp);
+			error = core_extend(cp, offset + runlen);
 			if (error != 0)
 				break;
 		}
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index d4529e096929..7ef1d19f0ea8 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -3466,7 +3466,7 @@ prison_check_af(struct ucred *cred, int af)
 	pr = cred->cr_prison;
 #ifdef VIMAGE
 	/* Prisons with their own network stack are not limited. */
-	if (prison_owns_vnet(cred))
+	if (prison_owns_vnet(pr))
 		return (0);
 #endif
 
@@ -3531,7 +3531,7 @@ prison_if(struct ucred *cred, const struct sockaddr *sa)
 	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 
 #ifdef VIMAGE
-	if (prison_owns_vnet(cred))
+	if (prison_owns_vnet(cred->cr_prison))
 		return (0);
 #endif
 
@@ -3648,7 +3648,7 @@ jailed_without_vnet(struct ucred *cred)
 	if (!jailed(cred))
 		return (false);
 #ifdef VIMAGE
-	if (prison_owns_vnet(cred))
+	if (prison_owns_vnet(cred->cr_prison))
 		return (false);
 #endif
 
@@ -3711,20 +3711,17 @@ getjailname(struct ucred *cred, char *name, size_t len)
 
 #ifdef VIMAGE
 /*
- * Determine whether the prison represented by cred owns
- * its vnet rather than having it inherited.
- *
- * Returns true in case the prison owns the vnet, false otherwise.
+ * Determine whether the prison owns its VNET.
  */
 bool
-prison_owns_vnet(struct ucred *cred)
+prison_owns_vnet(struct prison *pr)
 {
 
 	/*
 	 * vnets cannot be added/removed after jail creation,
 	 * so no need to lock here.
 	 */
-	return ((cred->cr_prison->pr_flags & PR_VNET) != 0);
+	return ((pr->pr_flags & PR_VNET) != 0);
 }
 #endif
 
@@ -4425,7 +4422,7 @@ sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 #ifdef VIMAGE
 	struct ucred *cred = req->td->td_ucred;
 
-	havevnet = jailed(cred) && prison_owns_vnet(cred);
+	havevnet = jailed(cred) && prison_owns_vnet(cred->cr_prison);
 #else
 	havevnet = 0;
 #endif
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index d9aeec68e620..0f0bc056cafd 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -287,7 +287,7 @@ sys_getgid(struct thread *td, struct getgid_args *uap)
 
 	td->td_retval[0] = td->td_ucred->cr_rgid;
 #if defined(COMPAT_43)
-	td->td_retval[1] = td->td_ucred->cr_groups[0];
+	td->td_retval[1] = td->td_ucred->cr_gid;
 #endif
 	return (0);
 }
@@ -307,7 +307,7 @@ int
 sys_getegid(struct thread *td, struct getegid_args *uap)
 {
 
-	td->td_retval[0] = td->td_ucred->cr_groups[0];
+	td->td_retval[0] = td->td_ucred->cr_gid;
 	return (0);
 }
 
@@ -1080,7 +1080,7 @@ sys_setgid(struct thread *td, struct setgid_args *uap)
 	    gid != oldcred->cr_svgid &&		/* allow setgid(saved gid) */
 #endif
 #ifdef POSIX_APPENDIX_B_4_2_2	/* Use BSD-compat clause from B.4.2.2 */
-	    gid != oldcred->cr_groups[0] && /* allow setgid(getegid()) */
+	    gid != oldcred->cr_gid && /* allow setgid(getegid()) */
 #endif
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETGID)) != 0)
 		goto fail;
@@ -1092,7 +1092,7 @@ sys_setgid(struct thread *td, struct setgid_args *uap)
 	 */
 	if (
 #ifdef POSIX_APPENDIX_B_4_2_2	/* use the clause from B.4.2.2 */
-	    gid == oldcred->cr_groups[0] ||
+	    gid == oldcred->cr_gid ||
 #endif
 	    /* We are using privs. */
 	    priv_check_cred(oldcred, PRIV_CRED_SETGID) == 0)
@@ -1121,7 +1121,7 @@ sys_setgid(struct thread *td, struct setgid_args *uap)
 	 * In all cases permitted cases, we are changing the egid.
 	 * Copy credentials so other references do not see our changes.
 	 */
-	if (oldcred->cr_groups[0] != gid) {
+	if (oldcred->cr_gid != gid) {
 		change_egid(newcred, gid);
 		setsugid(p);
 	}
@@ -1167,7 +1167,7 @@ sys_setegid(struct thread *td, struct setegid_args *uap)
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETEGID)) != 0)
 		goto fail;
 
-	if (oldcred->cr_groups[0] != egid) {
+	if (oldcred->cr_gid != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
@@ -1393,12 +1393,12 @@ sys_setregid(struct thread *td, struct setregid_args *uap)
 
 	if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
 	    rgid != oldcred->cr_svgid) ||
-	     (egid != (gid_t)-1 && egid != oldcred->cr_groups[0] &&
+	     (egid != (gid_t)-1 && egid != oldcred->cr_gid &&
 	     egid != oldcred->cr_rgid && egid != oldcred->cr_svgid)) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETREGID)) != 0)
 		goto fail;
 
-	if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) {
+	if (egid != (gid_t)-1 && oldcred->cr_gid != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
@@ -1406,9 +1406,9 @@ sys_setregid(struct thread *td, struct setregid_args *uap)
 		change_rgid(newcred, rgid);
 		setsugid(p);
 	}
-	if ((rgid != (gid_t)-1 || newcred->cr_groups[0] != newcred->cr_rgid) &&
-	    newcred->cr_svgid != newcred->cr_groups[0]) {
-		change_svgid(newcred, newcred->cr_groups[0]);
+	if ((rgid != (gid_t)-1 || newcred->cr_gid != newcred->cr_rgid) &&
+	    newcred->cr_svgid != newcred->cr_gid) {
+		change_svgid(newcred, newcred->cr_gid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
@@ -1547,17 +1547,17 @@ sys_setresgid(struct thread *td, struct setresgid_args *uap)
 
 	if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
 	      rgid != oldcred->cr_svgid &&
-	      rgid != oldcred->cr_groups[0]) ||
+	      rgid != oldcred->cr_gid) ||
 	     (egid != (gid_t)-1 && egid != oldcred->cr_rgid &&
 	      egid != oldcred->cr_svgid &&
-	      egid != oldcred->cr_groups[0]) ||
+	      egid != oldcred->cr_gid) ||
 	     (sgid != (gid_t)-1 && sgid != oldcred->cr_rgid &&
 	      sgid != oldcred->cr_svgid &&
-	      sgid != oldcred->cr_groups[0])) &&
+	      sgid != oldcred->cr_gid)) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETRESGID)) != 0)
 		goto fail;
 
-	if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) {
+	if (egid != (gid_t)-1 && oldcred->cr_gid != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
@@ -1626,8 +1626,8 @@ sys_getresgid(struct thread *td, struct getresgid_args *uap)
 		error1 = copyout(&cred->cr_rgid,
 		    uap->rgid, sizeof(cred->cr_rgid));
 	if (uap->egid)
-		error2 = copyout(&cred->cr_groups[0],
-		    uap->egid, sizeof(cred->cr_groups[0]));
+		error2 = copyout(&cred->cr_gid,
+		    uap->egid, sizeof(cred->cr_gid));
 	if (uap->sgid)
 		error3 = copyout(&cred->cr_svgid,
 		    uap->sgid, sizeof(cred->cr_svgid));
@@ -1737,7 +1737,7 @@ groupmember(gid_t gid, const struct ucred *cred)
 
 	groups_check_positive_len(cred->cr_ngroups);
 
-	if (gid == cred->cr_groups[0])
+	if (gid == cred->cr_gid)
 		return (true);
 
 	return (group_is_supplementary(gid, cred));
@@ -3015,7 +3015,7 @@ void
 change_egid(struct ucred *newcred, gid_t egid)
 {
 
-	newcred->cr_groups[0] = egid;
+	newcred->cr_gid = egid;
 }
 
 /*-
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 35b258e68701..8438298afc0e 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -698,10 +698,13 @@ sendfile_wait_generic(struct socket *so, off_t need, int *space)
 	 */
 	error = 0;
 	SOCK_SENDBUF_LOCK(so);
-	if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
-		so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
-	if (so->so_snd.sb_lowat < PAGE_SIZE && so->so_snd.sb_hiwat >= PAGE_SIZE)
-		so->so_snd.sb_lowat = PAGE_SIZE;
+	if (so->so_snd.sb_flags & SB_AUTOLOWAT) {
+		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
+			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
+		if (so->so_snd.sb_lowat < PAGE_SIZE &&
+		    so->so_snd.sb_hiwat >= PAGE_SIZE)
+			so->so_snd.sb_lowat = PAGE_SIZE;
+	}
 retry_space:
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		error = EPIPE;
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5d51aa675cb7..da0efac0598d 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -45,10 +45,10 @@
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/capsicum.h>
-#include <sys/compressor.h>
 #include <sys/condvar.h>
 #include <sys/devctl.h>
 #include <sys/event.h>
+#include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/jail.h>
@@ -80,6 +80,7 @@
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
+#include <sys/ucoredump.h>
 #include <sys/unistd.h>
 #include <sys/vmmeter.h>
 #include <sys/wait.h>
@@ -101,7 +102,6 @@ SDT_PROBE_DEFINE2(proc, , , signal__clear,
 SDT_PROBE_DEFINE3(proc, , , signal__discard,
     "struct thread *", "struct proc *", "int");
 
-static int	coredump(struct thread *);
 static int	killpg1(struct thread *td, int sig, int pgid, int all,
 		    ksiginfo_t *ksi);
 static int	issignal(struct thread *td);
@@ -126,11 +126,6 @@ const struct filterops sig_filtops = {
 	.f_event = filt_signal,
 };
 
-static int	kern_logsigexit = 1;
-SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
-    &kern_logsigexit, 0,
-    "Log processes quitting on abnormal signals to syslog(3)");
-
 static int	kern_forcesigexit = 1;
 SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW,
     &kern_forcesigexit, 0, "Force trap signal to be handled");
@@ -193,26 +188,6 @@ SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
 	    (cr1)->cr_ruid == (cr2)->cr_uid || \
 	    (cr1)->cr_uid == (cr2)->cr_uid)
 
-static int	sugid_coredump;
-SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
-    &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
-
-static int	capmode_coredump;
-SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
-    &capmode_coredump, 0, "Allow processes in capability mode to dump core");
-
-static int	do_coredump = 1;
-SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
-	&do_coredump, 0, "Enable/Disable coredumps");
-
-static int	set_core_nodump_flag = 0;
-SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
-	0, "Enable setting the NODUMP flag on coredump files");
-
-static int	coredump_devctl = 0;
-SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
-	0, "Generate a devctl notification when processes coredump");
-
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
@@ -784,6 +759,13 @@ sigprop(int sig)
 	return (0);
 }
 
+bool
+sig_do_core(int sig)
+{
+
+	return ((sigprop(sig) & SIGPROP_CORE) != 0);
+}
+
 static bool
 sigact_flag_test(const struct sigaction *act, int flag)
 {
@@ -2665,6 +2647,8 @@ static void
 ptrace_coredumpreq(struct thread *td, struct proc *p,
     struct thr_coredump_req *tcq)
 {
+	struct coredump_vnode_ctx wctx;
+	struct coredump_writer cdw;
 	void *rl_cookie;
 
 	if (p->p_sysent->sv_coredump == NULL) {
@@ -2672,8 +2656,15 @@ ptrace_coredumpreq(struct thread *td, struct proc *p,
 		return;
 	}
 
+	wctx.vp = tcq->tc_vp;
+	wctx.fcred = NOCRED;
+
+	cdw.ctx = &wctx;
+	cdw.write_fn = core_vn_write;
+	cdw.extend_fn = core_vn_extend;
+
 	rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX);
-	tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp,
+	tcq->tc_error = p->p_sysent->sv_coredump(td, &cdw,
 	    tcq->tc_limit, tcq->tc_flags);
 	vn_rangelock_unlock(tcq->tc_vp, rl_cookie);
 }
@@ -3635,82 +3626,6 @@ killproc(struct proc *p, const char *why)
 }
 
 /*
- * Force the current process to exit with the specified signal, dumping core
- * if appropriate.  We bypass the normal tests for masked and caught signals,
- * allowing unrecoverable failures to terminate the process without changing
- * signal state.  Mark the accounting record with the signal termination.
- * If dumping core, save the signal number for the debugger.  Calls exit and
- * does not return.
- */
-void
-sigexit(struct thread *td, int sig)
-{
-	struct proc *p = td->td_proc;
-	const char *coreinfo;
-	int rv;
-	bool logexit;
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	proc_set_p2_wexit(p);
-
-	p->p_acflag |= AXSIG;
-	if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0)
-		logexit = kern_logsigexit != 0;
-	else
-		logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0;
-
-	/*
-	 * We must be single-threading to generate a core dump.  This
-	 * ensures that the registers in the core file are up-to-date.
-	 * Also, the ELF dump handler assumes that the thread list doesn't
-	 * change out from under it.
-	 *
-	 * XXX If another thread attempts to single-thread before us
-	 *     (e.g. via fork()), we won't get a dump at all.
-	 */
-	if ((sigprop(sig) & SIGPROP_CORE) &&
-	    thread_single(p, SINGLE_NO_EXIT) == 0) {
-		p->p_sig = sig;
-		/*
-		 * Log signals which would cause core dumps
-		 * (Log as LOG_INFO to appease those who don't want
-		 * these messages.)
-		 * XXX : Todo, as well as euid, write out ruid too
-		 * Note that coredump() drops proc lock.
-		 */
-		rv = coredump(td);
-		switch (rv) {
-		case 0:
-			sig |= WCOREFLAG;
-			coreinfo = " (core dumped)";
-			break;
-		case EFAULT:
-			coreinfo = " (no core dump - bad address)";
-			break;
-		case EINVAL:
-			coreinfo = " (no core dump - invalid argument)";
-			break;
-		case EFBIG:
-			coreinfo = " (no core dump - too large)";
-			break;
-		default:
-			coreinfo = " (no core dump - other error)";
-			break;
-		}
-		if (logexit)
-			log(LOG_INFO,
-			    "pid %d (%s), jid %d, uid %d: exited on "
-			    "signal %d%s\n", p->p_pid, p->p_comm,
-			    p->p_ucred->cr_prison->pr_id,
-			    td->td_ucred->cr_uid,
-			    sig &~ WCOREFLAG, coreinfo);
-	} else
-		PROC_UNLOCK(p);
-	exit1(td, 0, sig);
-	/* NOTREACHED */
-}
-
-/*
  * Send queued SIGCHLD to parent when child process's state
  * is changed.
  */
@@ -3803,477 +3718,6 @@ childproc_exited(struct proc *p)
 	sigparent(p, reason, status);
 }
 
-#define	MAX_NUM_CORE_FILES 100000
-#ifndef NUM_CORE_FILES
-#define	NUM_CORE_FILES 5
-#endif
-CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES);
-static int num_cores = NUM_CORE_FILES;
-
-static int
-sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
-{
-	int error;
-	int new_val;
-
-	new_val = num_cores;
-	error = sysctl_handle_int(oidp, &new_val, 0, req);
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-	if (new_val > MAX_NUM_CORE_FILES)
-		new_val = MAX_NUM_CORE_FILES;
-	if (new_val < 0)
-		new_val = 0;
-	num_cores = new_val;
-	return (0);
-}
-SYSCTL_PROC(_debug, OID_AUTO, ncores,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
-    sysctl_debug_num_cores_check, "I",
-    "Maximum number of generated process corefiles while using index format");
-
-#define	GZIP_SUFFIX	".gz"
-#define	ZSTD_SUFFIX	".zst"
-
-int compress_user_cores = 0;
-
-static int
-sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS)
-{
-	int error, val;
-
-	val = compress_user_cores;
-	error = sysctl_handle_int(oidp, &val, 0, req);
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-	if (val != 0 && !compressor_avail(val))
-		return (EINVAL);
-	compress_user_cores = val;
-	return (error);
-}
-SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores,
-    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int),
-    sysctl_compress_user_cores, "I",
-    "Enable compression of user corefiles ("
-    __XSTRING(COMPRESS_GZIP) " = gzip, "
-    __XSTRING(COMPRESS_ZSTD) " = zstd)");
-
-int compress_user_cores_level = 6;
-SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN,
-    &compress_user_cores_level, 0,
-    "Corefile compression level");
-
-/*
- * Protect the access to corefilename[] by allproc_lock.
- */
-#define	corefilename_lock	allproc_lock
-
-static char corefilename[MAXPATHLEN] = {"%N.core"};
-TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
-
-static int
-sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
-{
-	int error;
-
-	sx_xlock(&corefilename_lock);
-	error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
-	    req);
-	sx_xunlock(&corefilename_lock);
-
-	return (error);
-}
-SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
-    CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
-    "Process corefile name format string");
-
-static void
-vnode_close_locked(struct thread *td, struct vnode *vp)
-{
-
-	VOP_UNLOCK(vp);
-	vn_close(vp, FWRITE, td->td_ucred, td);
-}
-
-/*
- * If the core format has a %I in it, then we need to check
- * for existing corefiles before defining a name.
- * To do this we iterate over 0..ncores to find a
- * non-existing core file name to use. If all core files are
- * already used we choose the oldest one.
- */
-static int
-corefile_open_last(struct thread *td, char *name, int indexpos,
-    int indexlen, int ncores, struct vnode **vpp)
-{
-	struct vnode *oldvp, *nextvp, *vp;
-	struct vattr vattr;
-	struct nameidata nd;
-	int error, i, flags, oflags, cmode;
-	char ch;
-	struct timespec lasttime;
-
-	nextvp = oldvp = NULL;
-	cmode = S_IRUSR | S_IWUSR;
-	oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
-	    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
-
-	for (i = 0; i < ncores; i++) {
-		flags = O_CREAT | FWRITE | O_NOFOLLOW;
-
-		ch = name[indexpos + indexlen];
-		(void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
-		    i);
-		name[indexpos + indexlen] = ch;
-
-		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
-		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
-		    NULL);
-		if (error != 0)
-			break;
-
-		vp = nd.ni_vp;
-		NDFREE_PNBUF(&nd);
-		if ((flags & O_CREAT) == O_CREAT) {
-			nextvp = vp;
-			break;
-		}
-
-		error = VOP_GETATTR(vp, &vattr, td->td_ucred);
-		if (error != 0) {
-			vnode_close_locked(td, vp);
-			break;
-		}
-
-		if (oldvp == NULL ||
-		    lasttime.tv_sec > vattr.va_mtime.tv_sec ||
-		    (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
-		    lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
-			if (oldvp != NULL)
-				vn_close(oldvp, FWRITE, td->td_ucred, td);
-			oldvp = vp;
-			VOP_UNLOCK(oldvp);
-			lasttime = vattr.va_mtime;
-		} else {
-			vnode_close_locked(td, vp);
-		}
-	}
-
-	if (oldvp != NULL) {
-		if (nextvp == NULL) {
-			if ((td->td_proc->p_flag & P_SUGID) != 0) {
-				error = EFAULT;
-				vn_close(oldvp, FWRITE, td->td_ucred, td);
-			} else {
-				nextvp = oldvp;
-				error = vn_lock(nextvp, LK_EXCLUSIVE);
-				if (error != 0) {
-					vn_close(nextvp, FWRITE, td->td_ucred,
-					    td);
-					nextvp = NULL;
-				}
-			}
-		} else {
-			vn_close(oldvp, FWRITE, td->td_ucred, td);
-		}
-	}
-	if (error != 0) {
-		if (nextvp != NULL)
-			vnode_close_locked(td, oldvp);
-	} else {
-		*vpp = nextvp;
-	}
-
-	return (error);
-}
-
-/*
- * corefile_open(comm, uid, pid, td, compress, vpp, namep)
- * Expand the name described in corefilename, using name, uid, and pid
- * and open/create core file.
- * corefilename is a printf-like string, with three format specifiers:
- *	%N	name of process ("name")
- *	%P	process id (pid)
- *	%U	user id (uid)
- * For example, "%N.core" is the default; they can be disabled completely
- * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
- * This is controlled by the sysctl variable kern.corefile (see above).
- */
-static int
-corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
-    int compress, int signum, struct vnode **vpp, char **namep)
-{
-	struct sbuf sb;
-	struct nameidata nd;
-	const char *format;
-	char *hostname, *name;
-	int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
-
-	hostname = NULL;
-	format = corefilename;
-	name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
-	indexlen = 0;
-	indexpos = -1;
-	ncores = num_cores;
-	(void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
-	sx_slock(&corefilename_lock);
-	for (i = 0; format[i] != '\0'; i++) {
-		switch (format[i]) {
-		case '%':	/* Format character */
-			i++;
-			switch (format[i]) {
-			case '%':
-				sbuf_putc(&sb, '%');
-				break;
-			case 'H':	/* hostname */
-				if (hostname == NULL) {
-					hostname = malloc(MAXHOSTNAMELEN,
-					    M_TEMP, M_WAITOK);
-				}
-				getcredhostname(td->td_ucred, hostname,
-				    MAXHOSTNAMELEN);
-				sbuf_cat(&sb, hostname);
-				break;
-			case 'I':	/* autoincrementing index */
-				if (indexpos != -1) {
-					sbuf_printf(&sb, "%%I");
-					break;
-				}
-
-				indexpos = sbuf_len(&sb);
-				sbuf_printf(&sb, "%u", ncores - 1);
-				indexlen = sbuf_len(&sb) - indexpos;
-				break;
-			case 'N':	/* process name */
-				sbuf_printf(&sb, "%s", comm);
-				break;
-			case 'P':	/* process id */
-				sbuf_printf(&sb, "%u", pid);
-				break;
-			case 'S':	/* signal number */
-				sbuf_printf(&sb, "%i", signum);
-				break;
-			case 'U':	/* user id */
-				sbuf_printf(&sb, "%u", uid);
-				break;
-			default:
-				log(LOG_ERR,
-				    "Unknown format character %c in "
-				    "corename `%s'\n", format[i], format);
-				break;
-			}
-			break;
-		default:
-			sbuf_putc(&sb, format[i]);
-			break;
-		}
-	}
-	sx_sunlock(&corefilename_lock);
-	free(hostname, M_TEMP);
-	if (compress == COMPRESS_GZIP)
-		sbuf_cat(&sb, GZIP_SUFFIX);
-	else if (compress == COMPRESS_ZSTD)
-		sbuf_cat(&sb, ZSTD_SUFFIX);
-	if (sbuf_error(&sb) != 0) {
-		log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
-		    "long\n", (long)pid, comm, (u_long)uid);
-		sbuf_delete(&sb);
-		free(name, M_TEMP);
-		return (ENOMEM);
-	}
-	sbuf_finish(&sb);
-	sbuf_delete(&sb);
-
-	if (indexpos != -1) {
-		error = corefile_open_last(td, name, indexpos, indexlen, ncores,
-		    vpp);
-		if (error != 0) {
-			log(LOG_ERR,
-			    "pid %d (%s), uid (%u):  Path `%s' failed "
-			    "on initial open test, error = %d\n",
-			    pid, comm, uid, name, error);
-		}
-	} else {
-		cmode = S_IRUSR | S_IWUSR;
-		oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
-		    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
-		flags = O_CREAT | FWRITE | O_NOFOLLOW;
-		if ((td->td_proc->p_flag & P_SUGID) != 0)
-			flags |= O_EXCL;
-
-		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
-		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
-		    NULL);
-		if (error == 0) {
-			*vpp = nd.ni_vp;
-			NDFREE_PNBUF(&nd);
-		}
-	}
-
-	if (error != 0) {
-#ifdef AUDIT
-		audit_proc_coredump(td, name, error);
-#endif
-		free(name, M_TEMP);
-		return (error);
-	}
-	*namep = name;
-	return (0);
-}
-
-/*
- * Dump a process' core.  The main routine does some
- * policy checking, and creates the name of the coredump;
- * then it passes on a vnode and a size limit to the process-specific
- * coredump routine if there is one; if there _is not_ one, it returns
- * ENOSYS; otherwise it returns the error from the process-specific routine.
- */
-
-static int
-coredump(struct thread *td)
-{
-	struct proc *p = td->td_proc;
-	struct ucred *cred = td->td_ucred;
-	struct vnode *vp;
-	struct flock lf;
-	struct vattr vattr;
-	size_t fullpathsize;
-	int error, error1, jid, locked, ppid, sig;
-	char *name;			/* name of corefile */
-	void *rl_cookie;
-	off_t limit;
-	char *fullpath, *freepath = NULL;
-	struct sbuf *sb;
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
-
-	if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
-	    (p->p_flag2 & P2_NOTRACE) != 0) {
-		PROC_UNLOCK(p);
-		return (EFAULT);
-	}
-
-	/*
-	 * Note that the bulk of limit checking is done after
-	 * the corefile is created.  The exception is if the limit
-	 * for corefiles is 0, in which case we don't bother
-	 * creating the corefile at all.  This layout means that
-	 * a corefile is truncated instead of not being created,
-	 * if it is larger than the limit.
-	 */
-	limit = (off_t)lim_cur(td, RLIMIT_CORE);
-	if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
-		PROC_UNLOCK(p);
-		return (EFBIG);
-	}
-
-	ppid = p->p_oppid;
-	sig = p->p_sig;
-	jid = p->p_ucred->cr_prison->pr_id;
-	PROC_UNLOCK(p);
-
-	error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
-	    compress_user_cores, p->p_sig, &vp, &name);
-	if (error != 0)
-		return (error);
-
-	/*
-	 * Don't dump to non-regular files or files with links.
-	 * Do not dump into system files. Effective user must own the corefile.
-	 */
-	if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
-	    vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
-	    vattr.va_uid != cred->cr_uid) {
-		VOP_UNLOCK(vp);
-		error = EFAULT;
-		goto out;
-	}
-
-	VOP_UNLOCK(vp);
-
-	/* Postpone other writers, including core dumps of other processes. */
-	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
-
-	lf.l_whence = SEEK_SET;
-	lf.l_start = 0;
-	lf.l_len = 0;
-	lf.l_type = F_WRLCK;
-	locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
-
-	VATTR_NULL(&vattr);
-	vattr.va_size = 0;
-	if (set_core_nodump_flag)
-		vattr.va_flags = UF_NODUMP;
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-	VOP_SETATTR(vp, &vattr, cred);
-	VOP_UNLOCK(vp);
-	PROC_LOCK(p);
-	p->p_acflag |= ACORE;
-	PROC_UNLOCK(p);
-
-	if (p->p_sysent->sv_coredump != NULL) {
-		error = p->p_sysent->sv_coredump(td, vp, limit, 0);
-	} else {
-		error = ENOSYS;
-	}
-
-	if (locked) {
-		lf.l_type = F_UNLCK;
-		VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
-	}
-	vn_rangelock_unlock(vp, rl_cookie);
-
-	/*
-	 * Notify the userland helper that a process triggered a core dump.
-	 * This allows the helper to run an automated debugging session.
-	 */
-	if (error != 0 || coredump_devctl == 0)
-		goto out;
-	sb = sbuf_new_auto();
-	if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0)
-		goto out2;
-	sbuf_cat(sb, "comm=\"");
-	devctl_safe_quote_sb(sb, fullpath);
-	free(freepath, M_TEMP);
-	sbuf_cat(sb, "\" core=\"");
-
-	/*
-	 * We can't lookup core file vp directly. When we're replacing a core, and
-	 * other random times, we flush the name cache, so it will fail. Instead,
-	 * if the path of the core is relative, add the current dir in front if it.
-	 */
-	if (name[0] != '/') {
-		fullpathsize = MAXPATHLEN;
-		freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
-		if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) {
-			free(freepath, M_TEMP);
-			goto out2;
-		}
-		devctl_safe_quote_sb(sb, fullpath);
-		free(freepath, M_TEMP);
-		sbuf_putc(sb, '/');
-	}
-	devctl_safe_quote_sb(sb, name);
-	sbuf_putc(sb, '"');
-
-	sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d",
-	    jid, p->p_pid, ppid, sig);
-	if (sbuf_finish(sb) == 0)
-		devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
-out2:
-	sbuf_delete(sb);
-out:
-	error1 = vn_close(vp, FWRITE, cred, td);
-	if (error == 0)
-		error = error1;
-#ifdef AUDIT
-	audit_proc_coredump(td, name, error);
-#endif
-	free(name, M_TEMP);
-	return (error);
-}
-
 /*
  * Nonexistent system call-- signal process (may want to handle it).  Flag
  * error in case process won't see signal immediately (blocked or ignored).
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 46226cc31980..25da134661e9 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -2368,7 +2368,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #ifdef VIMAGE
 		else if ((oid->oid_kind & CTLFLAG_VNET) &&
-		     prison_owns_vnet(req->td->td_ucred))
+		     prison_owns_vnet(req->td->td_ucred->cr_prison))
 			priv = PRIV_SYSCTL_WRITEJAIL;
 #endif
 		else
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index f853af193016..50b040132396 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -571,7 +571,7 @@ threadinit(void)
 
 	/*
 	 * Thread structures are specially aligned so that (at least) the
-	 * 5 lower bits of a pointer to 'struct thead' must be 0.  These bits
+	 * 5 lower bits of a pointer to 'struct thread' must be 0.  These bits
 	 * are used by synchronization primitives to store flags in pointers to
 	 * such structures.
 	 */
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
new file mode 100644
index 000000000000..d425596b5f24
--- /dev/null
+++ b/sys/kern/kern_ucoredump.c
@@ -0,0 +1,299 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/compressor.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/rmlock.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/ucoredump.h>
+#include <sys/wait.h>
+
+static int coredump(struct thread *td, const char **);
+
+int compress_user_cores = 0;
+
+static SLIST_HEAD(, coredumper)	coredumpers =
+    SLIST_HEAD_INITIALIZER(coredumpers);
+static struct rmlock	coredump_rmlock;
+RM_SYSINIT(coredump_lock, &coredump_rmlock, "coredump_lock");
+
+static int kern_logsigexit = 1;
+SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
+    &kern_logsigexit, 0,
+    "Log processes quitting on abnormal signals to syslog(3)");
+
+static int sugid_coredump;
+SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
+    &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
+
+static int do_coredump = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
+	&do_coredump, 0, "Enable/Disable coredumps");
+
+static int
+sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS)
+{
+	int error, val;
+
+	val = compress_user_cores;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (val != 0 && !compressor_avail(val))
+		return (EINVAL);
+	compress_user_cores = val;
+	return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores,
+    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int),
+    sysctl_compress_user_cores, "I",
+    "Enable compression of user corefiles ("
+    __XSTRING(COMPRESS_GZIP) " = gzip, "
+    __XSTRING(COMPRESS_ZSTD) " = zstd)");
+
+int compress_user_cores_level = 6;
+SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN,
+    &compress_user_cores_level, 0,
+    "Corefile compression level");
+
+void
+coredumper_register(struct coredumper *cd)
+{
+
+	blockcount_init(&cd->cd_refcount);
+	rm_wlock(&coredump_rmlock);
+	SLIST_INSERT_HEAD(&coredumpers, cd, cd_entry);
+	rm_wunlock(&coredump_rmlock);
+}
+
+void
+coredumper_unregister(struct coredumper *cd)
+{
+
+	rm_wlock(&coredump_rmlock);
+	SLIST_REMOVE(&coredumpers, cd, coredumper, cd_entry);
+	rm_wunlock(&coredump_rmlock);
+
+	/*
+	 * Wait for any in-process coredumps to finish before returning.
+	 */
+	blockcount_wait(&cd->cd_refcount, NULL, "dumpwait", 0);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate.  We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state.  Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger.  Calls exit and
+ * does not return.
+ */
+void
+sigexit(struct thread *td, int sig)
+{
+	struct proc *p = td->td_proc;
+	int rv;
+	bool logexit;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	proc_set_p2_wexit(p);
+
+	p->p_acflag |= AXSIG;
+	if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0)
+		logexit = kern_logsigexit != 0;
+	else
+		logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0;
+
+	/*
+	 * We must be single-threading to generate a core dump.  This
+	 * ensures that the registers in the core file are up-to-date.
+	 * Also, the ELF dump handler assumes that the thread list doesn't
+	 * change out from under it.
+	 *
+	 * XXX If another thread attempts to single-thread before us
+	 *     (e.g. via fork()), we won't get a dump at all.
+	 */
+	if (sig_do_core(sig) && thread_single(p, SINGLE_NO_EXIT) == 0) {
+		const char *err = NULL;
+
+		p->p_sig = sig;
+		/*
+		 * Log signals which would cause core dumps
+		 * (Log as LOG_INFO to appease those who don't want
+		 * these messages.)
+		 * XXX : Todo, as well as euid, write out ruid too
+		 * Note that coredump() drops proc lock.
+		 */
+		rv = coredump(td, &err);
+		if (rv == 0) {
+			MPASS(err == NULL);
+			sig |= WCOREFLAG;
+		} else if (err == NULL) {
+			switch (rv) {
+			case EFAULT:
+				err = "bad address";
+				break;
+			case EINVAL:
+				err = "invalild argument";
+				break;
+			case EFBIG:
+				err = "too large";
+				break;
+			default:
+				err = "other error";
+				break;
+			}
+		}
+		if (logexit)
+			log(LOG_INFO,
+			    "pid %d (%s), jid %d, uid %d: exited on "
+			    "signal %d (%s%s)\n", p->p_pid, p->p_comm,
+			    p->p_ucred->cr_prison->pr_id,
+			    td->td_ucred->cr_uid, sig &~ WCOREFLAG,
+			    err != NULL ? "no core dump - " : "core dumped",
+			    err != NULL ? err : "");
+	} else
+		PROC_UNLOCK(p);
+	exit1(td, 0, sig);
+	/* NOTREACHED */
+}
+
+
+/*
+ * Dump a process' core.  The main routine does some
+ * policy checking, and creates the name of the coredump;
+ * then it passes on a vnode and a size limit to the process-specific
+ * coredump routine if there is one; if there _is not_ one, it returns
+ * ENOSYS; otherwise it returns the error from the process-specific routine.
+ */
+static int
+coredump(struct thread *td, const char **errmsg)
+{
+	struct coredumper *iter, *chosen;
+	struct proc *p = td->td_proc;
+	struct rm_priotracker tracker;
+	off_t limit;
+	int error, priority;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
+
+	if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
+	    (p->p_flag2 & P2_NOTRACE) != 0) {
+		PROC_UNLOCK(p);
+
+		if (!do_coredump)
+			*errmsg = "denied by kern.coredump";
+		else if ((p->p_flag2 & P2_NOTRACE) != 0)
+			*errmsg = "process has trace disabled";
+		else
+			*errmsg = "sugid process denied by kern.sugid_coredump";
+		return (EFAULT);
+	}
+
+	/*
+	 * Note that the bulk of limit checking is done after
+	 * the corefile is created.  The exception is if the limit
+	 * for corefiles is 0, in which case we don't bother
+	 * creating the corefile at all.  This layout means that
+	 * a corefile is truncated instead of not being created,
+	 * if it is larger than the limit.
+	 */
+	limit = (off_t)lim_cur(td, RLIMIT_CORE);
+	if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
+		PROC_UNLOCK(p);
+		*errmsg = "coredumpsize limit is 0";
+		return (EFBIG);
+	}
+
+	rm_rlock(&coredump_rmlock, &tracker);
+	priority = -1;
+	chosen = NULL;
+	SLIST_FOREACH(iter, &coredumpers, cd_entry) {
+		if (iter->cd_probe == NULL) {
+			/*
+			 * If we haven't found anything of a higher priority
+			 * yet, we'll call this a GENERIC.  Ideally, we want
+			 * coredumper modules to include a probe function.
+			 */
+			if (priority < 0) {
+				priority = COREDUMPER_GENERIC;
+				chosen = iter;
+			}
+
+			continue;
+		}
+
+		error = (*iter->cd_probe)(td);
+		if (error < 0)
+			continue;
+
+		/*
+		 * Higher priority than previous options.
+		 */
+		if (error > priority) {
+			priority = error;
+			chosen = iter;
+		}
+	}
+
+	/*
+	 * Acquire our refcount before we drop the lock so that
+	 * coredumper_unregister() can safely assume that the refcount will only
+	 * go down once it's dropped the rmlock.
+	 */
+	blockcount_acquire(&chosen->cd_refcount, 1);
+	rm_runlock(&coredump_rmlock, &tracker);
+
+	/* Currently, we always have the vnode dumper built in. */
+	MPASS(chosen != NULL);
+	error = ((*chosen->cd_handle)(td, limit));
+	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+	blockcount_release(&chosen->cd_refcount, 1);
+
+	return (error);
+}
diff --git a/sys/kern/subr_compressor.c b/sys/kern/subr_compressor.c
index 280264881241..5d59622e0455 100644
--- a/sys/kern/subr_compressor.c
+++ b/sys/kern/subr_compressor.c
@@ -538,6 +538,12 @@ compressor_init(compressor_cb_t cb, int format, size_t maxiosize, int level,
 	return (s);
 }
 
+int
+compressor_format(const struct compressor *stream)
+{
+	return (stream->methods->format);
+}
+
 void
 compressor_reset(struct compressor *stream)
 {
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index b472aaea89e6..5606b36f772f 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -2269,6 +2269,7 @@ exterr_copyout(struct thread *td)
 		ue.error = 0;
 		sz = sizeof(ue.error);
 	} else {
+		ktrexterr(td);
 		sz = sizeof(ue) - __offsetof(struct uexterror, error);
 	}
 	error = copyout(&ue.error, uloc, sz);
@@ -2335,7 +2336,6 @@ exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1,
 		td->td_kexterr.p1 = pp1;
 		td->td_kexterr.p2 = pp2;
 		td->td_kexterr.src_line = line;
-		ktrexterr(td);
 	}
 	return (eerror);
 }
diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index ce09042abdac..66ce1b5a081d 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -1207,7 +1207,7 @@ sb_mark_notready(struct sockbuf *sb)
 	for (; m != NULL; m = m->m_next) {
 		KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
 		    __func__));
-		KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
+		KASSERT((m->m_flags & M_NOTREADY) == 0, ("%s: mbuf not ready",
 		    __func__));
 		KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
 		    __func__));
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 6f83b875a6b6..85fe48ddd466 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -1134,10 +1134,10 @@ shm_doremove(struct shm_mapping *map)
 
 int
 kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
-    int shmflags, struct filecaps *fcaps, const char *name __unused)
+    int shmflags, struct filecaps *fcaps, const char *name __unused,
+    struct shmfd *shmfd)
 {
 	struct pwddesc *pdp;
-	struct shmfd *shmfd;
 	struct file *fp;
 	char *path;
 	void *rl_cookie;
@@ -1214,23 +1214,41 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
 	if (error != 0)
 		goto outnofp;
 
-	/* A SHM_ANON path pointer creates an anonymous object. */
+	/*
+	 * A SHM_ANON path pointer creates an anonymous object.  We allow other
+	 * parts of the kernel to pre-populate a shmfd and then materialize an
+	 * fd for it here as a means to pass data back up to userland.  This
+	 * doesn't really make sense for named shm objects, but it makes plenty
+	 * of sense for anonymous objects.
+	 */
 	if (userpath == SHM_ANON) {
-		/* A read-only anonymous object is pointless. */
-		if ((flags & O_ACCMODE) == O_RDONLY) {
-			error = EINVAL;
-			goto out;
-		}
-		shmfd = shm_alloc(td->td_ucred, cmode, largepage);
-		if (shmfd == NULL) {
-			error = ENOMEM;
-			goto out;
+		if (shmfd != NULL) {
+			shm_hold(shmfd);
+		} else {
+			/*
+			 * A read-only anonymous object is pointless, unless it
+			 * was pre-populated by the kernel with the expectation
+			 * that a shmfd would later be created for userland to
+			 * access it through.
+			 */
+			if ((flags & O_ACCMODE) == O_RDONLY) {
+				error = EINVAL;
+				goto out;
+			}
+			shmfd = shm_alloc(td->td_ucred, cmode, largepage);
+			if (shmfd == NULL) {
+				error = ENOMEM;
+				goto out;
+			}
+
+			shmfd->shm_seals = initial_seals;
+			shmfd->shm_flags = shmflags;
 		}
-		shmfd->shm_seals = initial_seals;
-		shmfd->shm_flags = shmflags;
 	} else {
 		fnv = fnv_32_str(path, FNV1_32_INIT);
 		sx_xlock(&shm_dict_lock);
+
+		MPASS(shmfd == NULL);
 		shmfd = shm_lookup(path, fnv);
 		if (shmfd == NULL) {
 			/* Object does not yet exist, create it if requested. */
@@ -2173,7 +2191,7 @@ kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode,
     struct filecaps *caps)
 {
 
-	return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL));
+	return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL, NULL));
 }
 
 /*
@@ -2191,7 +2209,7 @@ sys_shm_open2(struct thread *td, struct shm_open2_args *uap)
 {
 
 	return (kern_shm_open2(td, uap->path, uap->flags, uap->mode,
-	    uap->shmflags, NULL, uap->name));
+	    uap->shmflags, NULL, uap->name, NULL));
 }
 
 int
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index ec00878cd9a5..745702bd4a4f 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -195,14 +195,14 @@ int
 sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 {
 	struct mbuf *m;
-	u_int blocker;
+	bool blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
 	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
 
 	m = m0;
-	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
+	blocker = (sb->sb_fnrdy == m);
 
 	while (count > 0) {
 		KASSERT(m->m_flags & M_NOTREADY,
@@ -217,8 +217,7 @@ sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 			m->m_epg_nrdy = 0;
 		} else
 			count--;
-
-		m->m_flags &= ~(M_NOTREADY | blocker);
+		m->m_flags &= ~M_NOTREADY;
 		if (blocker)
 			sb->sb_acc += m->m_len;
 		m = m->m_next;
@@ -240,12 +239,8 @@ sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 	}
 
 	/* This one was blocking all the queue. */
-	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
-		KASSERT(m->m_flags & M_BLOCKED,
-		    ("%s: m %p !M_BLOCKED", __func__, m));
-		m->m_flags &= ~M_BLOCKED;
+	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next)
 		sb->sb_acc += m->m_len;
-	}
 
 	sb->sb_fnrdy = m;
 	sbready_compress(sb, m0, m);
@@ -269,8 +264,7 @@ sballoc(struct sockbuf *sb, struct mbuf *m)
 			sb->sb_fnrdy = m;
 		else
 			sb->sb_acc += m->m_len;
-	} else
-		m->m_flags |= M_BLOCKED;
+	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl += m->m_len;
@@ -287,29 +281,29 @@ sballoc(struct sockbuf *sb, struct mbuf *m)
 void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
+	struct mbuf *n;
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
-
 	sb->sb_ccc -= m->m_len;
 
-	if (!(m->m_flags & M_NOTAVAIL))
-		sb->sb_acc -= m->m_len;
-
 	if (m == sb->sb_fnrdy) {
-		struct mbuf *n;
-
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 
 		n = m->m_next;
 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
-			n->m_flags &= ~M_BLOCKED;
 			sb->sb_acc += n->m_len;
 			n = n->m_next;
 		}
 		sb->sb_fnrdy = n;
+	} else {
+		/* Assert that mbuf is not behind sb_fnrdy. */
+		for (n = sb->sb_fnrdy; n != NULL; n = n->m_next)
+			KASSERT(n != m, ("%s: sb %p freeing %p behind sb_fnrdy",
+			    __func__, sb, m));
+		sb->sb_acc -= m->m_len;
 	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
@@ -779,6 +773,7 @@ sbsetopt(struct socket *so, struct sockopt *sopt)
 		 * high-water.
 		 */
 		*lowat = (cc > *hiwat) ? *hiwat : cc;
+		*flags &= ~SB_AUTOLOWAT;
 		break;
 	}
 
@@ -1128,13 +1123,7 @@ sbcheck(struct sockbuf *sb, const char *file, int line)
 			}
 			fnrdy = m;
 		}
-		if (fnrdy) {
-			if (!(m->m_flags & M_NOTAVAIL)) {
-				printf("sb %p: fnrdy %p, m %p is avail\n",
-				    sb, sb->sb_fnrdy, m);
-				goto fail;
-			}
-		} else
+		if (fnrdy == NULL)
 			acc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
@@ -1601,8 +1590,8 @@ sbcut_internal(struct sockbuf *sb, int len)
 			next = m->m_nextpkt;
 		}
 		if (m->m_len > len) {
-			KASSERT(!(m->m_flags & M_NOTAVAIL),
-			    ("%s: m %p M_NOTAVAIL", __func__, m));
+			KASSERT(!(m->m_flags & M_NOTREADY),
+			    ("%s: m %p M_NOTREADY", __func__, m));
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_ccc -= len;
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 6c9eb7139cd1..fe2d8d056062 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1211,7 +1211,8 @@ solisten_clone(struct socket *head)
 	so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
 	so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
 	so->so_rcv.sb_flags = head->sol_sbrcv_flags & SB_AUTOSIZE;
-	so->so_snd.sb_flags = head->sol_sbsnd_flags & SB_AUTOSIZE;
+	so->so_snd.sb_flags = head->sol_sbsnd_flags &
+	    (SB_AUTOSIZE | SB_AUTOLOWAT);
 	if ((so->so_proto->pr_flags & PR_SOCKBUF) == 0) {
 		so->so_snd.sb_mtx = &so->so_snd_mtx;
 		so->so_rcv.sb_mtx = &so->so_rcv_mtx;
@@ -2988,8 +2989,8 @@ dontblock:
 	 */
 	moff = 0;
 	offset = 0;
-	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
-	    && error == 0) {
+	while (m != NULL && !(m->m_flags & M_NOTREADY) && uio->uio_resid > 0 &&
+	    error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
@@ -3341,7 +3342,7 @@ deliver:
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
-				KASSERT(!(m->m_flags & M_NOTAVAIL),
+				KASSERT(!(m->m_flags & M_NOTREADY),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
@@ -4514,6 +4515,9 @@ sokqfilter_generic(struct socket *so, struct knote *kn)
 		SOCK_BUF_LOCK(so, which);
 		knlist_add(knl, kn, 1);
 		sb->sb_flags |= SB_KNOTE;
+		if ((kn->kn_sfflags & NOTE_LOWAT) &&
+		    (sb->sb_flags & SB_AUTOLOWAT))
+			sb->sb_flags &= ~SB_AUTOLOWAT;
 		SOCK_BUF_UNLOCK(so, which);
 	}
 	SOCK_UNLOCK(so);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 918b256e6c59..29774cf87393 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -6533,17 +6533,6 @@ vop_read_pgcache_post(void *ap, int rc)
 		VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ);
 }
 
-void
-vop_readdir_post(void *ap, int rc)
-{
-	struct vop_readdir_args *a = ap;
-
-	if (!rc) {
-		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
-		INOTIFY(a->a_vp, IN_ACCESS);
-	}
-}
-
 static struct knlist fs_knlist;
 
 static void
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index c71e0d9ee569..25d40a9806cb 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2253,10 +2253,10 @@ kern_accessat(struct thread *td, int fd, const char *path,
 	cred = td->td_ucred;
 	if ((flag & AT_EACCESS) == 0 &&
 	    ((cred->cr_uid != cred->cr_ruid ||
-	    cred->cr_rgid != cred->cr_groups[0]))) {
+	    cred->cr_rgid != cred->cr_gid))) {
 		usecred = crdup(cred);
 		usecred->cr_uid = cred->cr_ruid;
-		usecred->cr_groups[0] = cred->cr_rgid;
+		usecred->cr_gid = cred->cr_rgid;
 		td->td_ucred = usecred;
 	} else
 		usecred = cred;
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 38138a4af921..2e63215b2f97 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -242,8 +242,8 @@ vop_read_pgcache {
 
 
 %% write	vp	L L L
-%! write	pre	VOP_WRITE_PRE
-%! write	post	VOP_WRITE_POST
+%! write	pre	vop_write_pre
+%! write	post	vop_write_post
 
 vop_write {
 	IN struct vnode *vp;
@@ -380,6 +380,7 @@ vop_symlink {
 
 
 %% readdir	vp	L L L
+%! readdir	pre	vop_readdir_pre
 %! readdir	post	vop_readdir_post
 
 vop_readdir {
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 7cb6e2124326..99c9ec9dcd01 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -34,6 +34,7 @@ SUBDIR=	\
 	alq \
 	${_amd_ecc_inject} \
 	${_amdgpio} \
+	${_amdsmu} \
 	${_amdsbwd} \
 	${_amdsmn} \
 	${_amdtemp} \
@@ -772,6 +773,7 @@ _acpi=		acpi
 _aesni=		aesni
 .endif
 _amd_ecc_inject=amd_ecc_inject
+_amdsmu=	amdsmu
 _amdsbwd=	amdsbwd
 _amdsmn=	amdsmn
 _amdtemp=	amdtemp
diff --git a/sys/modules/amdsmu/Makefile b/sys/modules/amdsmu/Makefile
new file mode 100644
index 000000000000..752f57173d61
--- /dev/null
+++ b/sys/modules/amdsmu/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2025 The FreeBSD Foundation
+#
+# This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+# under sponsorship from the FreeBSD Foundation.
+
+.PATH: ${SRCTOP}/sys/dev/amdsmu
+
+KMOD=	amdsmu
+SRCS=	amdsmu.c
+SRCS+=	bus_if.h device_if.h pci_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/efirt/Makefile b/sys/modules/efirt/Makefile
index 4738996fd4e6..c46484465b68 100644
--- a/sys/modules/efirt/Makefile
+++ b/sys/modules/efirt/Makefile
@@ -9,7 +9,7 @@ SRCS+=  device_if.h bus_if.h clock_if.h
 DPSRCS+= assym.inc
 
 .if ${MACHINE_CPUARCH} == "amd64"
-SRCS+=	opt_hwpmc_hooks.h opt_kstack_pages.h
+SRCS+=	opt_acpi.h opt_hwpmc_hooks.h opt_kstack_pages.h
 .endif
 
 efirt_support.o:	efirt_support.S assym.inc
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 5b3ee740d75e..0a35fb4095fb 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -76,31 +76,34 @@
  *	  heterogeneous bridges).
  */
 
-#include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
+#define	EXTERR_CATEGORY	EXTERR_CAT_BRIDGE
+
 #include <sys/param.h>
+#include <sys/ctype.h>  /* string functions */
 #include <sys/eventhandler.h>
-#include <sys/mbuf.h>
+#include <sys/exterrvar.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
 #include <sys/protosw.h>
+#include <sys/random.h>
 #include <sys/systm.h>
-#include <sys/jail.h>
-#include <sys/time.h>
 #include <sys/socket.h> /* for net/if.h */
 #include <sys/sockio.h>
-#include <sys/ctype.h>  /* string functions */
-#include <sys/kernel.h>
-#include <sys/random.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <sys/time.h>
+
 #include <vm/uma.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
@@ -254,8 +257,8 @@ struct bridge_iflist {
 	uint32_t		bif_addrcnt;	/* cur. # of addresses */
 	uint32_t		bif_addrexceeded;/* # of address violations */
 	struct epoch_context	bif_epoch_ctx;
-	ether_vlanid_t		bif_untagged;	/* untagged vlan id */
-	ifbvlan_set_t		bif_vlan_set;	/* allowed tagged vlans */
+	ether_vlanid_t		bif_pvid;	/* port vlan id */
+	ifbvlan_set_t		bif_vlan_set;	/* if allowed tagged vlans */
 };
 
 /*
@@ -404,7 +407,7 @@ static int	bridge_ioctl_sma(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
-static int	bridge_ioctl_sifuntagged(struct bridge_softc *, void *);
+static int	bridge_ioctl_sifpvid(struct bridge_softc *, void *);
 static int	bridge_ioctl_sifvlanset(struct bridge_softc *, void *);
 static int	bridge_ioctl_gifvlanset(struct bridge_softc *, void *);
 static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
@@ -625,7 +628,7 @@ static const struct bridge_control bridge_control_table[] = {
 	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
-	{ bridge_ioctl_sifuntagged,	sizeof(struct ifbreq),
+	{ bridge_ioctl_sifpvid,		sizeof(struct ifbreq),
 	  BC_F_COPYIN|BC_F_SUSER },
 
 	{ bridge_ioctl_sifvlanset,	sizeof(struct ifbif_vlan_req),
@@ -986,31 +989,37 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	case SIOCGDRVSPEC:
 	case SIOCSDRVSPEC:
 		if (ifd->ifd_cmd >= bridge_control_table_size) {
-			error = EINVAL;
+			error = EXTERROR(EINVAL, "Invalid control command");
 			break;
 		}
 		bc = &bridge_control_table[ifd->ifd_cmd];
 
 		if (cmd == SIOCGDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) == 0) {
-			error = EINVAL;
+			error = EXTERROR(EINVAL,
+			    "Inappropriate ioctl for command "
+			    "(expected SIOCSDRVSPEC)");
 			break;
 		}
 		else if (cmd == SIOCSDRVSPEC &&
 		    (bc->bc_flags & BC_F_COPYOUT) != 0) {
-			error = EINVAL;
+			error = EXTERROR(EINVAL,
+			    "Inappropriate ioctl for command "
+			    "(expected SIOCGDRVSPEC)");
 			break;
 		}
 
 		if (bc->bc_flags & BC_F_SUSER) {
 			error = priv_check(td, PRIV_NET_BRIDGE);
-			if (error)
+			if (error) {
+				EXTERROR(error, "PRIV_NET_BRIDGE required");
 				break;
+			}
 		}
 
 		if (ifd->ifd_len != bc->bc_argsize ||
 		    ifd->ifd_len > sizeof(args)) {
-			error = EINVAL;
+			error = EXTERROR(EINVAL, "Invalid argument size");
 			break;
 		}
 
@@ -1062,7 +1071,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		oldmtu = sc->sc_ifp->if_mtu;
 
 		if (ifr->ifr_mtu < IF_MINMTU) {
-			error = EINVAL;
+			error = EXTERROR(EINVAL,
+			    "Requested MTU is lower than IF_MINMTU");
 			break;
 		}
 		if (CK_LIST_EMPTY(&sc->sc_iflist)) {
@@ -1088,6 +1098,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 				(*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
 				    SIOCSIFMTU, (caddr_t)ifr);
 			}
+			EXTERROR(error,
+			    "Failed to set MTU on member interface");
 		} else {
 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
 		}
@@ -1125,14 +1137,14 @@ bridge_mutecaps(struct bridge_softc *sc)
 	mask = BRIDGE_IFCAPS_MASK;
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
-		/* Every member must support it or its disabled */
+		/* Every member must support it or it's disabled */
 		mask &= bif->bif_savedcaps;
 	}
 
 	CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
 		enabled = bif->bif_ifp->if_capenable;
 		enabled &= ~BRIDGE_IFCAPS_STRIP;
-		/* strip off mask bits and enable them again if allowed */
+		/* Strip off mask bits and enable them again if allowed */
 		enabled &= ~BRIDGE_IFCAPS_MASK;
 		enabled |= mask;
 		bridge_set_ifcap(sc, bif, enabled);
@@ -1282,7 +1294,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
 #endif
 			break;
 		}
-		/* reneable any interface capabilities */
+		/* Re-enable any interface capabilities */
 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
 	}
 	bstp_destroy(&bif->bif_stp);	/* prepare to free */
@@ -1318,21 +1330,48 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "No such interface",
+		    req->ifbr_ifsname));
 	if (ifs->if_ioctl == NULL)	/* must be supported */
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Interface must support ioctl(2)"));
+
+	/*
+	 * If the new interface is a vlan(4), it could be a bridge SVI.
+	 * Don't allow such things to be added to bridges.
+	 */
+	if (ifs->if_type == IFT_L2VLAN) {
+		struct ifnet *parent;
+		struct epoch_tracker et;
+		bool is_bridge;
+
+		/*
+		 * Entering NET_EPOCH with BRIDGE_LOCK held, but this is okay
+		 * since we don't sleep here.
+		 */
+		NET_EPOCH_ENTER(et);
+		parent = VLAN_TRUNKDEV(ifs);
+		is_bridge = (parent != NULL && parent->if_type == IFT_BRIDGE);
+		NET_EPOCH_EXIT(et);
+
+		if (is_bridge)
+			return (EXTERROR(EINVAL,
+			    "Bridge SVI cannot be added to a bridge"));
+	}
 
 	/* If it's in the span list, it can't be a member. */
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
-			return (EBUSY);
+			return (EXTERROR(EBUSY,
+			    "Span interface cannot be a member"));
 
 	if (ifs->if_bridge) {
 		struct bridge_iflist *sbif = ifs->if_bridge;
 		if (sbif->bif_sc == sc)
-			return (EEXIST);
+			return (EXTERROR(EEXIST,
+			    "Interface is already a member of this bridge"));
 
-		return (EBUSY);
+		return (EXTERROR(EBUSY,
+		    "Interface is already a member of another bridge"));
 	}
 
 	switch (ifs->if_type) {
@@ -1342,7 +1381,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 		/* permitted interface types */
 		break;
 	default:
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Unsupported interface type"));
 	}
 
 #ifdef INET6
@@ -1394,11 +1433,15 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 		CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) {
 #ifdef INET
 			if (ifa->ifa_addr->sa_family == AF_INET)
-				return (EINVAL);
+				return (EXTERROR(EINVAL,
+				    "Member interface may not have "
+				    "an IPv4 address configured"));
 #endif
 #ifdef INET6
 			if (ifa->ifa_addr->sa_family == AF_INET6)
-				return (EINVAL);
+				return (EXTERROR(EINVAL,
+				    "Member interface may not have "
+				    "an IPv6 address configured"));
 #endif
 		}
 	}
@@ -1420,7 +1463,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
 			    " new member %s\n", sc->sc_ifp->if_xname,
 			    ifr.ifr_mtu,
 			    ifs->if_xname);
-			return (EINVAL);
+			return (EXTERROR(EINVAL,
+			    "Failed to set MTU on new member"));
 		}
 	}
 
@@ -1482,7 +1526,7 @@ bridge_ioctl_del(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	bridge_delete_member(sc, bif, 0);
 
@@ -1498,7 +1542,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	bp = &bif->bif_stp;
 	req->ifbr_ifsflags = bif->bif_flags;
@@ -1512,7 +1556,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
 	req->ifbr_addrcnt = bif->bif_addrcnt;
 	req->ifbr_addrmax = bif->bif_addrmax;
 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
-	req->ifbr_untagged = bif->bif_untagged;
+	req->ifbr_pvid = bif->bif_pvid;
 
 	/* Copy STP state options as flags */
 	if (bp->bp_operedge)
@@ -1541,12 +1585,12 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 	bp = &bif->bif_stp;
 
 	if (req->ifbr_ifsflags & IFBIF_SPAN)
 		/* SPAN is readonly */
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Span interface cannot be modified"));
 
 	NET_EPOCH_ENTER(et);
 
@@ -1555,7 +1599,8 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
 			error = bstp_enable(&bif->bif_stp);
 			if (error) {
 				NET_EPOCH_EXIT(et);
-				return (error);
+				return (EXTERROR(error,
+				    "Failed to enable STP"));
 			}
 		}
 	} else {
@@ -1724,7 +1769,7 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
 	if (bif == NULL) {
 		NET_EPOCH_EXIT(et);
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 	}
 
 	/* bridge_rtupdate() may acquire the lock. */
@@ -1858,7 +1903,7 @@ bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
 }
@@ -1871,7 +1916,7 @@ bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
 }
@@ -1884,28 +1929,28 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	bif->bif_addrmax = req->ifbr_addrmax;
 	return (0);
 }
 
 static int
-bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg)
+bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg)
 {
 	struct ifbreq *req = arg;
 	struct bridge_iflist *bif;
 
 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
-	if (req->ifbr_untagged > DOT1Q_VID_MAX)
-		return (EINVAL);
+	if (req->ifbr_pvid > DOT1Q_VID_MAX)
+		return (EXTERROR(EINVAL, "Invalid VLAN ID"));
 
-	if (req->ifbr_untagged != DOT1Q_VID_NULL)
+	if (req->ifbr_pvid != DOT1Q_VID_NULL)
 		bif->bif_flags |= IFBIF_VLANFILTER;
-	bif->bif_untagged = req->ifbr_untagged;
+	bif->bif_pvid = req->ifbr_pvid;
 	return (0);
 }
 
@@ -1917,12 +1962,12 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->bv_ifname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	/* Reject invalid VIDs. */
 	if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) ||
 	    BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL))
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "Invalid VLAN ID in set"));
 
 	switch (req->bv_op) {
 		/* Replace the existing vlan set with the new set */
@@ -1942,7 +1987,8 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg)
 
 		/* Invalid or unknown operation */
 	default:
-		return (EINVAL);
+		return (EXTERROR(EINVAL,
+		    "Unsupported BRDGSIFVLANSET operation"));
 	}
 
 	/*
@@ -1962,7 +2008,7 @@ bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg)
 
 	bif = bridge_lookup_member(sc, req->bv_ifname);
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a bridge member"));
 
 	BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set);
 	return (0);
@@ -1977,14 +2023,16 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "No such interface"));
 
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
-			return (EBUSY);
+			return (EXTERROR(EBUSY,
+			    "Interface is already a span port"));
 
 	if (ifs->if_bridge != NULL)
-		return (EBUSY);
+		return (EXTERROR(EEXIST,
+		    "Interface is already a bridge member"));
 
 	switch (ifs->if_type) {
 		case IFT_ETHER:
@@ -1992,7 +2040,7 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
 		case IFT_L2VLAN:
 			break;
 		default:
-			return (EINVAL);
+			return (EXTERROR(EINVAL, "Unsupported interface type"));
 	}
 
 	bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
@@ -2016,14 +2064,14 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
 
 	ifs = ifunit(req->ifbr_ifsname);
 	if (ifs == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "No such interface"));
 
 	CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
 		if (ifs == bif->bif_ifp)
 			break;
 
 	if (bif == NULL)
-		return (ENOENT);
+		return (EXTERROR(ENOENT, "Interface is not a span port"));
 
 	bridge_delete_span(sc, bif);
 
@@ -2278,8 +2326,8 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
 		 * the VLAN header.
 		 */
 		if ((bif->bif_flags & IFBIF_VLANFILTER) &&
-		    bif->bif_untagged != DOT1Q_VID_NULL &&
-		    VLANTAGOF(m) == bif->bif_untagged) {
+		    bif->bif_pvid != DOT1Q_VID_NULL &&
+		    VLANTAGOF(m) == bif->bif_pvid) {
 			m->m_flags &= ~M_VLANTAG;
 			m->m_pkthdr.ether_vtag = 0;
 		}
@@ -3145,14 +3193,14 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m)
 		 * The frame doesn't have a tag.  If the interface does not
 		 * have an untagged vlan configured, drop the frame.
 		 */
-		if (sbif->bif_untagged == DOT1Q_VID_NULL)
+		if (sbif->bif_pvid == DOT1Q_VID_NULL)
 			return (false);
 
 		/*
 		 * Otherwise, insert a new tag based on the interface's
 		 * untagged vlan id.
 		 */
-		m->m_pkthdr.ether_vtag = sbif->bif_untagged;
+		m->m_pkthdr.ether_vtag = sbif->bif_pvid;
 		m->m_flags |= M_VLANTAG;
 	} else {
 		/*
@@ -3213,7 +3261,7 @@ bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m)
 	 * If the frame's vlan matches the interfaces's untagged vlan,
 	 * allow it.
 	 */
-	if (vlan == dbif->bif_untagged)
+	if (vlan == dbif->bif_pvid)
 		return (true);
 
 	/*
@@ -3244,10 +3292,11 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
 	BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
 
 	/* Check the source address is valid and not multicast. */
-	if (ETHER_IS_MULTICAST(dst) ||
-	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
-	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
-		return (EINVAL);
+	if (ETHER_IS_MULTICAST(dst))
+		return (EXTERROR(EINVAL, "Multicast address not permitted"));
+	if (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
+	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0)
+		return (EXTERROR(EINVAL, "Zero address not permitted"));
 
 	/*
 	 * A route for this destination might already exist.  If so,
@@ -3266,13 +3315,14 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
 			sc->sc_brtexceeded++;
 			BRIDGE_RT_UNLOCK(sc);
-			return (ENOSPC);
+			return (EXTERROR(ENOSPC, "Address table is full"));
 		}
 		/* Check per interface address limits (if enabled) */
 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
 			bif->bif_addrexceeded++;
 			BRIDGE_RT_UNLOCK(sc);
-			return (ENOSPC);
+			return (EXTERROR(ENOSPC,
+			    "Interface address limit exceeded"));
 		}
 
 		/*
@@ -3283,7 +3333,8 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
 		brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
 		if (brt == NULL) {
 			BRIDGE_RT_UNLOCK(sc);
-			return (ENOMEM);
+			return (EXTERROR(ENOMEM,
+			    "Cannot allocate address node"));
 		}
 		brt->brt_vnet = curvnet;
 
@@ -3631,7 +3682,7 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
 	do {
 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
-			return (EEXIST);
+			return (EXTERROR(EEXIST, "Address already exists"));
 		if (dir > 0) {
 			CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
 			goto out;
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index 97b63e3d4416..c458dcc152a0 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -124,7 +124,7 @@
 #define	BRDGSPROTO		28	/* set protocol (ifbrparam) */
 #define	BRDGSTXHC		29	/* set tx hold count (ifbrparam) */
 #define	BRDGSIFAMAX		30	/* set max interface addrs (ifbreq) */
-#define	BRDGSIFUNTAGGED		31	/* set if untagged vlan */
+#define	BRDGSIFPVID		31	/* set if PVID */
 #define	BRDGSIFVLANSET		32	/* set if vlan set */
 #define	BRDGGIFVLANSET		33	/* get if vlan set */
 
@@ -144,7 +144,7 @@ struct ifbreq {
 	uint32_t	ifbr_addrcnt;		/* member if addr number */
 	uint32_t	ifbr_addrmax;		/* member if addr max */
 	uint32_t	ifbr_addrexceeded;	/* member if addr violations */
-	ether_vlanid_t	ifbr_untagged;		/* member if untagged vlan */
+	ether_vlanid_t	ifbr_pvid;		/* member if PVID */
 	uint8_t		pad[32];
 };
 
diff --git a/sys/net/if_ovpn.c b/sys/net/if_ovpn.c
index 7bdbc565f4ca..fe3e7bbd7fff 100644
--- a/sys/net/if_ovpn.c
+++ b/sys/net/if_ovpn.c
@@ -34,11 +34,13 @@
 #include <sys/epoch.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/nv.h>
+#include <sys/osd.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
@@ -79,7 +81,6 @@
 #include "if_ovpn.h"
 
 struct ovpn_kkey_dir {
-	int			refcount;
 	uint8_t			key[32];
 	uint8_t			keylen;
 	uint8_t			nonce[8];
@@ -132,6 +133,9 @@ struct ovpn_notification {
 	/* Delete notification */
 	enum ovpn_del_reason	del_reason;
 	struct ovpn_peer_counters	counters;
+
+	/* Float notification */
+	struct sockaddr_storage	address;
 };
 
 struct ovpn_softc;
@@ -196,6 +200,10 @@ struct ovpn_softc {
 	struct epoch_context	 epoch_ctx;
 };
 
+struct ovpn_mtag {
+	struct sockaddr_storage	 addr;
+};
+
 static struct ovpn_kpeer *ovpn_find_peer(struct ovpn_softc *, uint32_t);
 static bool ovpn_udp_input(struct mbuf *, int, struct inpcb *,
     const struct sockaddr *, void *);
@@ -205,7 +213,10 @@ static int ovpn_encap(struct ovpn_softc *, uint32_t, struct mbuf *);
 static int ovpn_get_af(struct mbuf *);
 static void ovpn_free_kkey_dir(struct ovpn_kkey_dir *);
 static bool ovpn_check_replay(struct ovpn_kkey_dir *, uint32_t);
-static int ovpn_peer_compare(struct ovpn_kpeer *, struct ovpn_kpeer *);
+static int ovpn_peer_compare(const struct ovpn_kpeer *,
+    const struct ovpn_kpeer *);
+static bool ovpn_sockaddr_compare(const struct sockaddr *,
+    const struct sockaddr *);
 
 static RB_PROTOTYPE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare);
 static RB_GENERATE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare);
@@ -278,11 +289,48 @@ SYSCTL_INT(_net_link_openvpn, OID_AUTO, netisr_queue,
 	"Use netisr_queue() rather than netisr_dispatch().");
 
 static int
-ovpn_peer_compare(struct ovpn_kpeer *a, struct ovpn_kpeer *b)
+ovpn_peer_compare(const struct ovpn_kpeer *a, const struct ovpn_kpeer *b)
 {
 	return (a->peerid - b->peerid);
 }
 
+static bool
+ovpn_sockaddr_compare(const struct sockaddr *a,
+    const struct sockaddr *b)
+{
+	if (a->sa_family != b->sa_family)
+		return (false);
+	MPASS(a->sa_len == b->sa_len);
+
+	switch (a->sa_family) {
+	case AF_INET: {
+		const struct sockaddr_in *a4, *b4;
+
+		a4 = (const struct sockaddr_in *)a;
+		b4 = (const struct sockaddr_in *)b;
+
+		if (a4->sin_port != b4->sin_port)
+			return (false);
+
+		return (a4->sin_addr.s_addr == b4->sin_addr.s_addr);
+	}
+	case AF_INET6: {
+		const struct sockaddr_in6 *a6, *b6;
+
+		a6 = (const struct sockaddr_in6 *)a;
+		b6 = (const struct sockaddr_in6 *)b;
+
+		if (a6->sin6_port != b6->sin6_port)
+			return (false);
+
+		return (memcmp(&a6->sin6_addr, &b6->sin6_addr,
+		    sizeof(a6->sin6_addr)) == 0);
+	}
+	default:
+		panic("Unknown address family %d", a->sa_family);
+	}
+}
+
 static struct ovpn_kpeer *
 ovpn_find_peer(struct ovpn_softc *sc, uint32_t peerid)
 {
@@ -304,15 +352,15 @@ ovpn_find_only_peer(struct ovpn_softc *sc)
 }
 
 static uint16_t
-ovpn_get_port(struct sockaddr_storage *s)
+ovpn_get_port(const struct sockaddr_storage *s)
 {
 	switch (s->ss_family) {
 	case AF_INET: {
-		struct sockaddr_in *in = (struct sockaddr_in *)s;
+		const struct sockaddr_in *in = (const struct sockaddr_in *)s;
 		return (in->sin_port);
 	}
 	case AF_INET6: {
-		struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)s;
+		const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s;
 		return (in6->sin6_port);
 	}
 	default:
@@ -320,6 +368,25 @@ ovpn_get_port(struct sockaddr_storage *s)
 	}
 }
 
+static void
+ovpn_set_port(struct sockaddr_storage *s, unsigned short port)
+{
+	switch (s->ss_family) {
+	case AF_INET: {
+		struct sockaddr_in *in = (struct sockaddr_in *)s;
+		in->sin_port = port;
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)s;
+		in6->sin6_port = port;
+		break;
+	}
+	default:
+		panic("Unsupported address family %d", s->ss_family);
+	}
+}
+
 static int
 ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa)
 {
@@ -333,14 +400,16 @@ ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa)
 		return (EINVAL);
 
 	af = nvlist_get_number(nvl, "af");
-
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		struct sockaddr_in *in = (struct sockaddr_in *)sa;
 		size_t len;
 		const void *addr = nvlist_get_binary(nvl, "address", &len);
+
+		memset(in, 0, sizeof(*in));
 		in->sin_family = af;
+		in->sin_len = sizeof(*in);
 		if (len != sizeof(in->sin_addr))
 			return (EINVAL);
 
@@ -354,7 +423,10 @@ ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa)
 		struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa;
 		size_t len;
 		const void *addr = nvlist_get_binary(nvl, "address", &len);
+
+		memset(in6, 0, sizeof(*in6));
 		in6->sin6_family = af;
+		in6->sin6_len = sizeof(*in6);
 		if (len != sizeof(in6->sin6_addr))
 			return (EINVAL);
 
@@ -370,31 +442,42 @@ ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa)
 	return (0);
 }
 
-static bool
-ovpn_has_peers(struct ovpn_softc *sc)
+static int
+ovpn_add_sockaddr(nvlist_t *parent, const char *name, const struct sockaddr *s)
 {
-	OVPN_ASSERT(sc);
-
-	return (sc->peercount > 0);
-}
+	nvlist_t *nvl;
 
-static void
-ovpn_rele_so(struct ovpn_softc *sc)
-{
-	bool has_peers;
+	nvl = nvlist_create(0);
+	if (nvl == NULL)
+		return (ENOMEM);
 
-	OVPN_WASSERT(sc);
+	nvlist_add_number(nvl, "af", s->sa_family);
 
-	if (sc->so == NULL)
-		return;
+	switch (s->sa_family) {
+	case AF_INET: {
+		const struct sockaddr_in *s4 = (const struct sockaddr_in *)s;
 
-	has_peers = ovpn_has_peers(sc);
+		nvlist_add_number(nvl, "port", s4->sin_port);
+		nvlist_add_binary(nvl, "address", &s4->sin_addr,
+		    sizeof(s4->sin_addr));
+		break;
+	}
+	case AF_INET6: {
+		const struct sockaddr_in6 *s6 = (const struct sockaddr_in6 *)s;
 
-	if (! has_peers) {
-		MPASS(sc->peercount == 0);
-	} else {
-		MPASS(sc->peercount > 0);
+		nvlist_add_number(nvl, "port", s6->sin6_port);
+		nvlist_add_binary(nvl, "address", &s6->sin6_addr,
+		    sizeof(s6->sin6_addr));
+		break;
+	}
+	default:
+		nvlist_destroy(nvl);
+		return (EINVAL);
 	}
+
+	nvlist_move_nvlist(parent, name, nvl);
+
+	return (0);
 }
 
 static void
@@ -449,6 +532,33 @@ ovpn_notify_key_rotation(struct ovpn_softc *sc, struct ovpn_kpeer *peer)
 	}
 }
 
+static int
+ovpn_notify_float(struct ovpn_softc *sc, uint32_t peerid,
+    const struct sockaddr_storage *remote)
+{
+	struct ovpn_notification *n;
+
+	n = malloc(sizeof(*n), M_OVPN, M_NOWAIT | M_ZERO);
+	if (n == NULL)
+		return (ENOMEM);
+
+	n->peerid = peerid;
+	n->type = OVPN_NOTIF_FLOAT;
+	memcpy(&n->address, remote, sizeof(n->address));
+
+	if (buf_ring_enqueue(sc->notifring, n) != 0) {
+		free(n, M_OVPN);
+		return (ENOMEM);
+	} else if (sc->so != NULL) {
+		/* Wake up userspace */
+		sc->so->so_error = EAGAIN;
+		sorwakeup(sc->so);
+		sowwakeup(sc->so);
+	}
+
+	return (0);
+}
+
 static void
 ovpn_peer_release_ref(struct ovpn_kpeer *peer, bool locked)
 {
@@ -485,8 +595,6 @@ ovpn_peer_release_ref(struct ovpn_kpeer *peer, bool locked)
 		ovpn_free_kkey_dir(peer->keys[i].decrypt);
 	}
 
-	ovpn_rele_so(sc);
-
 	callout_stop(&peer->ping_send);
 	callout_stop(&peer->ping_rcv);
 	uma_zfree_pcpu(pcpu_zone_4, peer->last_active);
@@ -502,7 +610,7 @@ ovpn_new_peer(struct ifnet *ifp, const nvlist_t *nvl)
 #ifdef INET6
 	struct epoch_tracker et;
 #endif
-	struct sockaddr_storage remote;
+	struct sockaddr_storage local, remote;
 	struct ovpn_kpeer *peer = NULL;
 	struct file *fp = NULL;
 	struct ovpn_softc *sc = ifp->if_softc;
@@ -571,20 +679,37 @@ ovpn_new_peer(struct ifnet *ifp, const nvlist_t *nvl)
 	callout_init_rm(&peer->ping_send, &sc->lock, CALLOUT_SHAREDLOCK);
 	callout_init_rm(&peer->ping_rcv, &sc->lock, 0);
 
-	peer->local.ss_len = sizeof(peer->local);
-	ret = sosockaddr(so, (struct sockaddr *)&peer->local);
-	if (ret)
+	memset(&local, 0, sizeof(local));
+	local.ss_len = sizeof(local);
+	ret = sosockaddr(so, (struct sockaddr *)&local);
+	if (ret != 0)
 		goto error;
+	if (nvlist_exists_nvlist(nvl, "local")) {
+		struct sockaddr_storage local1;
+
+		ret = ovpn_nvlist_to_sockaddr(nvlist_get_nvlist(nvl, "local"),
+		    &local1);
+		if (ret != 0)
+			goto error;
 
-	if (ovpn_get_port(&peer->local) == 0) {
+		/*
+		 * openvpn doesn't provide a port here when in multihome mode,
+		 * just steal the one the socket is bound to.
+		 */
+		if (ovpn_get_port(&local1) == 0)
+			ovpn_set_port(&local1, ovpn_get_port(&local));
+		memcpy(&local, &local1, sizeof(local1));
+	}
+	if (ovpn_get_port(&local) == 0) {
 		ret = EINVAL;
 		goto error;
 	}
-	if (peer->local.ss_family != remote.ss_family) {
+	if (local.ss_family != remote.ss_family) {
 		ret = EINVAL;
 		goto error;
 	}
 
+	memcpy(&peer->local, &local, sizeof(local));
 	memcpy(&peer->remote, &remote, sizeof(remote));
 
 #ifdef INET6
@@ -633,6 +758,7 @@ ovpn_new_peer(struct ifnet *ifp, const nvlist_t *nvl)
 		 * a new one.
 		 */
 		ret = udp_set_kernel_tunneling(sc->so, NULL, NULL, NULL);
+		MPASS(ret == 0);
 		sorele(sc->so);
 		sc->so = NULL;
 	}
@@ -1364,12 +1490,36 @@ opvn_get_pkt(struct ovpn_softc *sc, nvlist_t **onvl)
 	}
 	nvlist_add_number(nvl, "peerid", n->peerid);
 	nvlist_add_number(nvl, "notification", n->type);
-	if (n->type == OVPN_NOTIF_DEL_PEER) {
+	switch (n->type) {
+	case OVPN_NOTIF_DEL_PEER: {
 		nvlist_add_number(nvl, "del_reason", n->del_reason);
 
 		/* No error handling, because we want to send the notification
 		 * even if we can't attach the counters. */
 		ovpn_notif_add_counters(nvl, n);
+		break;
+	}
+	case OVPN_NOTIF_FLOAT: {
+		int ret;
+
+		ret = ovpn_add_sockaddr(nvl, "address",
+		    (struct sockaddr *)&n->address);
+
+		if (ret) {
+			/*
+			 * Try to re-enqueue the notification. Maybe we'll
+			 * have better luck next time. No error handling,
+			 * because if we fail to re-enqueue there's nothing we can do.
+			 */
+			(void)ovpn_notify_float(sc, n->peerid, &n->address);
+			nvlist_destroy(nvl);
+			free(n, M_OVPN);
+			return (ret);
+		}
+		break;
+	}
+	default:
+		break;
 	}
 	free(n, M_OVPN);
 
@@ -1525,6 +1675,7 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m,
     struct rm_priotracker *_ovpn_lock_trackerp)
 {
 	uint32_t af;
+	struct m_tag *mtag;
 
 	OVPN_RASSERT(sc);
 	NET_EPOCH_ASSERT();
@@ -1543,6 +1694,38 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m,
 
 	OVPN_RUNLOCK(sc);
 
+	/* Check if the peer changed to a new source address. */
+	mtag = m_tag_find(m, PACKET_TAG_OVPN, NULL);
+	if (mtag != NULL) {
+		struct ovpn_mtag *ot = (struct ovpn_mtag *)(mtag + 1);
+
+		OVPN_WLOCK(sc);
+
+		/*
+		 * Check the address against the peer's remote again, because we may race
+		 * against ourselves (i.e. we may have tagged multiple packets to indicate we
+		 * floated).
+		 */
+		if (ovpn_sockaddr_compare((struct sockaddr *)&ot->addr,
+		    (struct sockaddr *)&peer->remote)) {
+			OVPN_WUNLOCK(sc);
+			goto skip_float;
+		}
+
+		/* And notify userspace. */
+		if (ovpn_notify_float(sc, peer->peerid, &ot->addr) == 0) {
+			/*
+			 * Update the 'remote' for this peer, but only if
+			 * we've actually enqueued the notification.
+			 * Otherwise we can try again later.
+			 */
+			memcpy(&peer->remote, &ot->addr, sizeof(peer->remote));
+		}
+
+		OVPN_WUNLOCK(sc);
+	}
+
+skip_float:
 	OVPN_COUNTER_ADD(sc, received_data_pkts, 1);
 	OVPN_COUNTER_ADD(sc, tunnel_bytes_received, m->m_pkthdr.len);
 	OVPN_PEER_COUNTER_ADD(peer, pkt_in, 1);
@@ -2305,6 +2488,29 @@ ovpn_udp_input(struct mbuf *m, int off, struct inpcb *inp,
 		return (true);
 	}
 
+	/*
+	 * If we got this from a different address than we expected tag the packet.
+	 * We'll deal with notifiying userspace later, after we've decrypted and
+	 * verified.
+	 */
+	if (! ovpn_sockaddr_compare((struct sockaddr *)&peer->remote, sa)) {
+		struct m_tag *mt;
+		struct ovpn_mtag *ot;
+
+		MPASS(sa->sa_len <= sizeof(ot->addr));
+		mt = m_tag_get(PACKET_TAG_OVPN, sizeof(*ot), M_NOWAIT);
+		/*
+		 * If we fail to allocate here we'll just try again on the next
+		 * packet.
+		 */
+		if (mt != NULL) {
+			ot = (struct ovpn_mtag *)(mt + 1);
+			memcpy(&ot->addr, sa, sa->sa_len);
+
+			m_tag_prepend(m, mt);
+		}
+	}
+
 	if (key->decrypt->cipher == OVPN_CIPHER_ALG_NONE) {
 		/* Now remove the outer headers */
 		m_adj_decap(m, sizeof(struct udphdr) + ohdrlen);
@@ -2519,6 +2725,7 @@ ovpn_clone_destroy_cb(struct epoch_context *ctx)
 
 	COUNTER_ARRAY_FREE(sc->counters, OVPN_COUNTER_SIZE);
 
+	rm_destroy(&sc->lock);
 	if_free(sc->ifp);
 	free(sc, M_OVPN);
 }
@@ -2579,23 +2786,53 @@ vnet_ovpn_init(const void *unused __unused)
 VNET_SYSINIT(vnet_ovpn_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
     vnet_ovpn_init, NULL);
 
-static void
-vnet_ovpn_uninit(const void *unused __unused)
+static int
+ovpn_prison_remove(void *obj, void *data __unused)
 {
-	if_clone_detach(V_ovpn_cloner);
+#ifdef VIMAGE
+	struct prison *pr;
+
+	pr = obj;
+	if (prison_owns_vnet(pr)) {
+		CURVNET_SET(pr->pr_vnet);
+		if (V_ovpn_cloner != NULL) {
+			ifc_detach_cloner(V_ovpn_cloner);
+			V_ovpn_cloner = NULL;
+		}
+		CURVNET_RESTORE();
+	}
+#endif
+	return (0);
 }
-VNET_SYSUNINIT(vnet_ovpn_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
-    vnet_ovpn_uninit, NULL);
 
 static int
 ovpnmodevent(module_t mod, int type, void *data)
 {
+	static int ovpn_osd_jail_slot;
+
 	switch (type) {
-	case MOD_LOAD:
-		/* Done in vnet_ovpn_init() */
+	case MOD_LOAD: {
+		/*
+		 * Registration is handled in vnet_ovpn_init(), but cloned
+		 * interfaces must be destroyed via PR_METHOD_REMOVE since they
+		 * hold a reference to the prison via the UDP socket, which
+		 * prevents the prison from being destroyed.
+		 */
+		osd_method_t methods[PR_MAXMETHOD] = {
+			[PR_METHOD_REMOVE] = ovpn_prison_remove,
+		};
+		ovpn_osd_jail_slot = osd_jail_register(NULL, methods);
 		break;
+	}
 	case MOD_UNLOAD:
-		/* Done in vnet_ovpn_uninit() */
+		if (ovpn_osd_jail_slot != 0)
+			osd_jail_deregister(ovpn_osd_jail_slot);
+		CURVNET_SET(vnet0);
+		if (V_ovpn_cloner != NULL) {
+			ifc_detach_cloner(V_ovpn_cloner);
+			V_ovpn_cloner = NULL;
+		}
+		CURVNET_RESTORE();
 		break;
 	default:
 		return (EOPNOTSUPP);
diff --git a/sys/net/if_ovpn.h b/sys/net/if_ovpn.h
index 2d6b8c1e7eff..2a24c35788a9 100644
--- a/sys/net/if_ovpn.h
+++ b/sys/net/if_ovpn.h
@@ -37,6 +37,7 @@
 enum ovpn_notif_type {
 	OVPN_NOTIF_DEL_PEER,
 	OVPN_NOTIF_ROTATE_KEY,
+	OVPN_NOTIF_FLOAT,
 };
 
 enum ovpn_del_reason {
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index 3bab04aa4d38..5e6f65c04b2f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -74,6 +74,7 @@
 #include <sys/malloc.h>
 #include <sys/random.h>
 #include <sys/ctype.h>
+#include <sys/osd.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
@@ -178,6 +179,7 @@ struct tuntap_softc {
 static struct mtx tunmtx;
 static eventhandler_tag arrival_tag;
 static eventhandler_tag clone_tag;
+static int tuntap_osd_jail_slot;
 static const char tunname[] = "tun";
 static const char tapname[] = "tap";
 static const char vmnetname[] = "vmnet";
@@ -497,6 +499,10 @@ vmnet_clone_match(struct if_clone *ifc, const char *name)
 	return (0);
 }
 
+/*
+ * Create a clone via the ifnet cloning mechanism.  Note that this is invoked
+ * indirectly by tunclone() below.
+ */
 static int
 tun_clone_create(struct if_clone *ifc, char *name, size_t len,
     struct ifc_data *ifd, struct ifnet **ifpp)
@@ -532,15 +538,19 @@ tun_clone_create(struct if_clone *ifc, char *name, size_t len,
 	if (i != 0)
 		i = tun_create_device(drv, unit, NULL, &dev, name);
 	if (i == 0) {
-		dev_ref(dev);
+		struct tuntap_softc *tp;
+
 		tuncreate(dev);
-		struct tuntap_softc *tp = dev->si_drv1;
+		tp = dev->si_drv1;
 		*ifpp = tp->tun_ifp;
 	}
 
 	return (i);
 }
 
+/*
+ * Create a clone via devfs access.
+ */
 static void
 tunclone(void *arg, struct ucred *cred, char *name, int namelen,
     struct cdev **dev)
@@ -595,11 +605,12 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
 		}
 
 		i = tun_create_device(drv, u, cred, dev, name);
-	}
-	if (i == 0) {
+	} else {
+		/* Consumed by the dev_clone invoker. */
 		dev_ref(*dev);
-		if_clone_create(name, namelen, NULL);
 	}
+	if (i == 0)
+		if_clone_create(name, namelen, NULL);
 out:
 	CURVNET_RESTORE();
 }
@@ -670,16 +681,6 @@ VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
 		vnet_tun_init, NULL);
 
 static void
-vnet_tun_uninit(const void *unused __unused)
-{
-
-	for (u_int i = 0; i < NDRV; ++i)
-		if_clone_detach(V_tuntap_driver_cloners[i]);
-}
-VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
-    vnet_tun_uninit, NULL);
-
-static void
 tun_uninit(const void *unused __unused)
 {
 	struct tuntap_driver *drv;
@@ -689,6 +690,16 @@ tun_uninit(const void *unused __unused)
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
 	EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
 
+	CURVNET_SET(vnet0);
+	for (u_int i = 0; i < NDRV; i++) {
+		if_clone_detach(V_tuntap_driver_cloners[i]);
+		V_tuntap_driver_cloners[i] = NULL;
+	}
+	CURVNET_RESTORE();
+
+	if (tuntap_osd_jail_slot != 0)
+		osd_jail_deregister(tuntap_osd_jail_slot);
+
 	mtx_lock(&tunmtx);
 	while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
 		TAILQ_REMOVE(&tunhead, tp, tun_list);
@@ -724,6 +735,30 @@ tuntap_driver_from_ifnet(const struct ifnet *ifp)
 	return (NULL);
 }
 
+/*
+ * Remove devices that were created by devfs cloning, as they hold references
+ * which prevent the prison from collapsing, in which state VNET sysuninits will
+ * not be invoked.
+ */
+static int
+tuntap_prison_remove(void *obj, void *data __unused)
+{
+#ifdef VIMAGE
+	struct prison *pr;
+
+	pr = obj;
+	if (prison_owns_vnet(pr)) {
+		CURVNET_SET(pr->pr_vnet);
+		for (u_int i = 0; i < NDRV; i++) {
+			if_clone_detach(V_tuntap_driver_cloners[i]);
+			V_tuntap_driver_cloners[i] = NULL;
+		}
+		CURVNET_RESTORE();
+	}
+#endif
+	return (0);
+}
+
 static int
 tuntapmodevent(module_t mod, int type, void *data)
 {
@@ -738,8 +773,12 @@ tuntapmodevent(module_t mod, int type, void *data)
 			clone_setup(&drv->clones);
 			drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
 		}
+		osd_method_t methods[PR_MAXMETHOD] = {
+			[PR_METHOD_REMOVE] = tuntap_prison_remove,
+		};
+		tuntap_osd_jail_slot = osd_jail_register(NULL, methods);
 		arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
-		   tunrename, 0, 1000);
+		    tunrename, 0, 1000);
 		if (arrival_tag == NULL)
 			return (ENOMEM);
 		clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
@@ -747,7 +786,7 @@ tuntapmodevent(module_t mod, int type, void *data)
 			return (ENOMEM);
 		break;
 	case MOD_UNLOAD:
-		/* See tun_uninit, so it's done after the vnet_sysuninit() */
+		/* See tun_uninit(). */
 		break;
 	default:
 		return EOPNOTSUPP;
@@ -798,6 +837,8 @@ tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
 	args.mda_si_drv1 = tp;
 	error = make_dev_s(&args, dev, "%s", name);
 	if (error != 0) {
+		mtx_destroy(&tp->tun_mtx);
+		cv_destroy(&tp->tun_cv);
 		free(tp, M_TUN);
 		return (error);
 	}
@@ -914,7 +955,6 @@ tap_transmit(struct ifnet *ifp, struct mbuf *m)
 	return (error);
 }
 
-/* XXX: should return an error code so it can fail. */
 static void
 tuncreate(struct cdev *dev)
 {
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index 22fcb7bf7c64..61000018e5a4 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -2336,6 +2336,18 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			error = ENOENT;
 			break;
 		}
+
+		/*
+		 * If the ifp is in a bridge, do not allow setting the device
+		 * to a bridge; this prevents having a bridge SVI as a bridge
+		 * member (which is not permitted).
+		 */
+		if (ifp->if_bridge != NULL && p->if_type == IFT_BRIDGE) {
+			if_rele(p);
+			error = EINVAL;
+			break;
+		}
+
 		if (vlr.vlr_proto == 0)
 			vlr.vlr_proto = ETHERTYPE_VLAN;
 		oldmtu = ifp->if_mtu;
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 452a8eb4024b..d55afe750869 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -331,6 +331,14 @@ MALLOC_DECLARE(M_PF_RULE_ITEM);
 
 SDT_PROVIDER_DECLARE(pf);
 SDT_PROBE_DECLARE(pf, , test, reason_set);
+SDT_PROBE_DECLARE(pf, , log, log);
+
+#define DPFPRINTF(n, fmt, x...)				\
+	do {						\
+		SDT_PROBE2(pf, , log, log, (n), fmt);	\
+		if (V_pf_status.debug >= (n))	 	\
+			printf(fmt "\n", ##x); 		\
+	} while (0)
 
 struct pfi_dynaddr {
 	TAILQ_ENTRY(pfi_dynaddr)	 entry;
@@ -1676,6 +1684,9 @@ struct pf_pdesc {
 	u_int32_t	 fragoff;	/* fragment header offset */
 	u_int32_t	 jumbolen;	/* length from v6 jumbo header */
 	u_int32_t	 badopts;	/* v4 options or v6 routing headers */
+#define	PF_OPT_OTHER		0x0001
+#define	PF_OPT_JUMBO		0x0002
+#define	PF_OPT_ROUTER_ALERT	0x0004
 
 	u_int16_t	*ip_sum;
 	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
diff --git a/sys/netinet/in_fib_dxr.c b/sys/netinet/in_fib_dxr.c
index b889131b544b..538cd43a88a3 100644
--- a/sys/netinet/in_fib_dxr.c
+++ b/sys/netinet/in_fib_dxr.c
@@ -345,7 +345,7 @@ initheap(struct dxr_aux *da, uint32_t dst_u32, uint32_t chunk)
 	struct heap_entry *fhp = &da->heap[0];
 	struct rtentry *rt;
 	struct route_nhop_data rnd;
- 
+
 	da->heap_index = 0;
 	da->dst.sin_addr.s_addr = htonl(dst_u32);
 	rt = fib4_lookup_rt(da->fibnum, da->dst.sin_addr, 0, NHR_UNLOCKED,
@@ -1143,7 +1143,7 @@ dxr_destroy(void *data)
 	free(da, M_DXRAUX);
 }
 
-static void 
+static void
 epoch_dxr_destroy(epoch_context_t ctx)
 {
 	struct dxr *dxr = __containerof(ctx, struct dxr, epoch_ctx);
@@ -1202,7 +1202,7 @@ dxr_dump_end(void *data, struct fib_dp *dp)
 static enum flm_op_result
 dxr_dump_rib_item(struct rtentry *rt, void *data)
 {
-	
+
 	return (FLM_SUCCESS);
 }
 
diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c
index 66af716eea52..7d8cb965ab09 100644
--- a/sys/netinet/sctp_timer.c
+++ b/sys/netinet/sctp_timer.c
@@ -35,7 +35,6 @@
 #define _IP_VHL
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_pcb.h>
-
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_timer.h>
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index 3e7eef8a1cda..f8c064b6a104 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -377,12 +377,12 @@ extern int32_t tcp_trace_point_count;
 
 /*
  * Returns true if any sort of BB logging is enabled,
- * commonly used throughout the codebase. 
+ * commonly used throughout the codebase.
  */
 static inline int
 tcp_bblogging_on(struct tcpcb *tp)
 {
-	if (tp->_t_logstate <= TCP_LOG_STATE_OFF) 
+	if (tp->_t_logstate <= TCP_LOG_STATE_OFF)
 		return (0);
 	if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
 		return (0);
@@ -427,7 +427,7 @@ tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
 	}
 }
 
-static inline uint32_t 
+static inline uint32_t
 tcp_get_bblog_state(struct tcpcb *tp)
 {
 	return (tp->_t_logstate);
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 10afed17bf3b..7512679bd4e9 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1301,9 +1301,9 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h
 		return (TCP_LRO_CANNOT);
 #endif
 	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
-	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
+	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) ||
 	    (m->m_pkthdr.csum_data != 0xffff)) {
-		/* 
+		/*
 		 * The checksum either did not have hardware offload
 		 * or it was a bad checksum. We can't LRO such
 		 * a packet.
@@ -1334,7 +1334,7 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h
 #endif
 	/* If no hardware or arrival stamp on the packet add timestamp */
 	if ((m->m_flags & (M_TSTMP_LRO | M_TSTMP)) == 0) {
-		m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); 
+		m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time);
 		m->m_flags |= M_TSTMP_LRO;
 	}
 
@@ -1429,9 +1429,9 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
 	int error;
 
 	if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) !=
-	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || 
+	     ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) ||
 	    (m->m_pkthdr.csum_data != 0xffff)) {
-		/* 
+		/*
 		 * The checksum either did not have hardware offload
 		 * or it was a bad checksum. We can't LRO such
 		 * a packet.
@@ -1481,7 +1481,7 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
  	    ((mb->m_flags & M_TSTMP) == 0)) {
  		/* Add in an LRO time since no hardware */
  		binuptime(&lc->lro_last_queue_time);
- 		mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); 
+ 		mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time);
  		mb->m_flags |= M_TSTMP_LRO;
  	}
 
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 90d789f0e224..4405098a8620 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -744,7 +744,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 	while (cur != NULL) {
 		if (!(sblkp >= sack_blocks)) {
 			if (((loss_sblks >= tcprexmtthresh) ||
-			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) 
+			    (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
 				break;
 			loss_thresh += loss_hiack - cur->end;
 			loss_hiack = cur->start;
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index d2636f01714e..b232d3f08fe6 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -5126,8 +5126,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				if (tp->t_maxseg < V_tcp_mssdflt) {
 					/*
-					 * The MSS is so small we should not 
-					 * process incoming SACK's since we are 
+					 * The MSS is so small we should not
+					 * process incoming SACK's since we are
 					 * subject to attack in such a case.
 					 */
 					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
@@ -10141,7 +10141,7 @@ bbr_init(struct tcpcb *tp, void **ptr)
 	 * flags.
 	 */
 	bbr_stop_all_timers(tp, bbr);
-	/* 
+	/*
 	 * Validate the timers are not in usec, if they are convert.
 	 * BBR should in theory move to USEC and get rid of a
 	 * lot of the TICKS_2 calls.. but for now we stay
@@ -11544,7 +11544,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		bbr_check_bbr_for_state(bbr, cts, __LINE__, (bbr->r_ctl.rc_lost - lost));
 		if (nxt_pkt == 0) {
 			if ((bbr->r_wanted_output != 0) ||
-			    (tp->t_flags & TF_ACKNOW)) { 
+			    (tp->t_flags & TF_ACKNOW)) {
 
 				bbr->rc_output_starts_timer = 0;
 				did_out = 1;
@@ -13172,11 +13172,7 @@ send:
 				mb, moff, &len,
 				if_hw_tsomaxsegcount,
 				if_hw_tsomaxsegsize, msb,
-				((rsm == NULL) ? hw_tls : 0)
-#ifdef NETFLIX_COPY_ARGS
-				, NULL, NULL
-#endif
-				);
+				((rsm == NULL) ? hw_tls : 0));
 			if (len <= maxseg) {
 				/*
 				 * Must have ran out of mbufs for the copy
@@ -13806,8 +13802,8 @@ nomore:
 					tp->t_maxseg = old_maxseg - 40;
 					if (tp->t_maxseg < V_tcp_mssdflt) {
 						/*
-						 * The MSS is so small we should not 
-						 * process incoming SACK's since we are 
+						 * The MSS is so small we should not
+						 * process incoming SACK's since we are
 						 * subject to attack in such a case.
 						 */
 						tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 834e1347a152..940a4024bb73 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -7888,8 +7888,8 @@ drop_it:
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				if (tp->t_maxseg < V_tcp_mssdflt) {
 					/*
-					 * The MSS is so small we should not 
-					 * process incoming SACK's since we are 
+					 * The MSS is so small we should not
+					 * process incoming SACK's since we are
 					 * subject to attack in such a case.
 					 */
 					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
@@ -14638,9 +14638,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 	if (rack->r_ctl.pcm_s == NULL) {
 		rack->r_ctl.pcm_i.cnt_alloc = 0;
 	}
-#ifdef NETFLIX_STATS
-	rack->r_ctl.side_chan_dis_mask = tcp_sidechannel_disable_mask;
-#endif
 	rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_per_upper_bound_ss;
 	rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_per_upper_bound_ca;
 	if (rack_enable_shared_cwnd)
@@ -15564,7 +15561,7 @@ rack_log_pcm(struct tcp_rack *rack, uint8_t mod, uint32_t flex1, uint32_t flex2,
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
-		
+
 		(void)tcp_get_usecs(&tv);
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
@@ -19915,7 +19912,7 @@ rack_output(struct tcpcb *tp)
 			goto nomore;
 		} else {
 			/* Return == 0, if there is more we can send tot_len wise fall through and send */
-			if (tot_len_this_send >= pace_max_seg) 
+			if (tot_len_this_send >= pace_max_seg)
 				return (ret);
 #ifdef TCP_ACCOUNTING
 			/* We need to re-pin since fast_output un-pined */
@@ -21556,11 +21553,7 @@ send:
 			m->m_next = tcp_m_copym(
 				mb, moff, &len,
 				if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb,
-				((rsm == NULL) ? hw_tls : 0)
-#ifdef NETFLIX_COPY_ARGS
-				, &s_mb, &s_moff
-#endif
-				);
+				((rsm == NULL) ? hw_tls : 0));
 			if (len <= (tp->t_maxseg - optlen)) {
 				/*
 				 * Must have ran out of mbufs for the copy
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
index b0e300847c4a..101e6826536c 100644
--- a/sys/netinet/tcp_stacks/rack_pcm.c
+++ b/sys/netinet/tcp_stacks/rack_pcm.c
@@ -172,7 +172,7 @@ rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack, uint32_t start, uint3
 		goto skip_ack_accounting;
 	}
 	/*
-	 * Record ACK data. 
+	 * Record ACK data.
 	 */
 	ack_arrival = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
 	if (SEQ_GT(end, rack->r_ctl.pcm_i.eseq)) {
@@ -305,7 +305,7 @@ skip_ack_accounting:
 					    0, &log, false, NULL, NULL, 0, &tv);
 		}
 	}
-	/* 
+	/*
 	 * Here we need a lot to be added including:
 	 * 1) Some form of measurement, where if we think the measurement
 	 *    is valid we iterate over the PCM data and come up with a path
diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c
index fc9ee8454a1e..2b70548f3cc6 100644
--- a/sys/netinet/tcp_stacks/sack_filter.c
+++ b/sys/netinet/tcp_stacks/sack_filter.c
@@ -400,7 +400,7 @@ sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq
 			break;
 		}
 		/* Copy it out to the outbound */
-		memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));		
+		memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));
 		at++;
 		room--;
 		/* now lets add it to our sack-board */
@@ -588,7 +588,7 @@ sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, i
 
 		sf->sf_ack = th_ack;
 		for(i=0, sf->sf_cur=0; i<numblks; i++) {
-			if ((in[i].end != tp->snd_max) && 
+			if ((in[i].end != tp->snd_max) &&
 			    ((in[i].end - in[i].start) < segmax)) {
 				/*
 				 * We do not accept blocks less than a MSS minus all
@@ -707,7 +707,7 @@ main(int argc, char **argv)
 	out = stdout;
 	memset(&tp, 0, sizeof(tp));
 	tp.t_maxseg = 1460;
-	
+
 	while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) {
 		switch (i) {
 		case 'S':
@@ -883,7 +883,7 @@ main(int argc, char **argv)
 			} else {
 				printf("can't open sack_setup.bin -- sorry no load\n");
 			}
-			
+
 		} else if (strncmp(buffer, "help", 4) == 0) {
 help:
 			fprintf(out, "You can input:\n");
diff --git a/sys/netinet/tcp_stacks/sack_filter.h b/sys/netinet/tcp_stacks/sack_filter.h
index b12fcf84567c..a1c0684a4359 100644
--- a/sys/netinet/tcp_stacks/sack_filter.h
+++ b/sys/netinet/tcp_stacks/sack_filter.h
@@ -42,7 +42,7 @@
  * previously processed sack information.
  *
  * The second thing that the sack filter does is help protect against malicious
- * attackers that are trying to attack any linked lists (or other data structures) 
+ * attackers that are trying to attack any linked lists (or other data structures)
  * that are used in sack processing. Consider an attacker sending in sacks for
  * every other byte of data outstanding. This could in theory drastically split
  * up any scoreboard you are maintaining and make you search through a very large
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index db415f6bdf03..26e7e53d540c 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -4537,7 +4537,7 @@ tcp_change_time_units(struct tcpcb *tp, int granularity)
 		panic("Unknown granularity:%d tp:%p",
 		      granularity, tp);
 	}
-#endif	
+#endif
 }
 
 void
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 32ce3001929c..3b9fe7a317b0 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -757,8 +757,8 @@ tcp_timer_rexmt(struct tcpcb *tp)
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				if (tp->t_maxseg < V_tcp_mssdflt) {
 					/*
-					 * The MSS is so small we should not 
-					 * process incoming SACK's since we are 
+					 * The MSS is so small we should not
+					 * process incoming SACK's since we are
 					 * subject to attack in such a case.
 					 */
 					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 687b0d538666..98c934955121 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -164,7 +164,7 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 		goto out;
 
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
-	so->so_snd.sb_flags |= SB_AUTOSIZE;
+	so->so_snd.sb_flags |= (SB_AUTOLOWAT | SB_AUTOSIZE);
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error)
 		goto out;
@@ -1768,9 +1768,9 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 				/*
 				 * Release the ref count the lookup
 				 * acquired.
-				 */ 
+				 */
 				refcount_release(&blk->tfb_refcnt);
-				/* 
+				/*
 				 * Now there is a chance that the
 				 * init() function mucked with some
 				 * things before it failed, such as
@@ -1800,7 +1800,7 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 		 * new one already.
 		 */
 		refcount_release(&tp->t_fb->tfb_refcnt);
-		/* 
+		/*
 		 * Set in the new stack.
 		 */
 		tp->t_fb = blk;
@@ -1934,7 +1934,7 @@ tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
 		CC_LIST_RUNLOCK();
 		return(ESRCH);
 	}
-	/* 
+	/*
 	 * With a reference the algorithm cannot be removed
 	 * so we hold a reference through the change process.
 	 */
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 059b2aff689d..b90f65e83cb1 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -182,7 +182,7 @@ struct tcp_sendfile_track {
  * snd_una). When the response comes back indicating
  * that there was data (return value 1), then the caller
  * can build a sendmap entry based on the range and the
- * times. The next query would then be done at the 
+ * times. The next query would then be done at the
  * newly created sendmap_end. Repeated until sendmap_end == snd_max.
  *
  * Flags in sendmap_flags are defined below as well.
@@ -197,7 +197,7 @@ struct tcp_sendfile_track {
  * The rack_times are a misc collection of information that
  * the old stack might possibly fill in. Of course its possible
  * that an old stack may not have a piece of information. If so
- * then setting that value to zero is advised. Setting any 
+ * then setting that value to zero is advised. Setting any
  * timestamp passed should only place a zero in it when it
  * is unfilled. This may mean that a time is off by a micro-second
  * but this is ok in the grand scheme of things.
@@ -205,13 +205,13 @@ struct tcp_sendfile_track {
  * When switching stacks it is desireable to get as much information
  * from the old stack to the new stack as possible. Though not always
  * will the stack be compatible in the types of information. The
- * init() function needs to take care when it begins changing 
+ * init() function needs to take care when it begins changing
  * things such as inp_flags2 and the timer units to position these
  * changes at a point where it is unlikely they will fail after
  * making such changes. A stack optionally can have an "undo"
- * function  
+ * function
  *
- * To transfer information to the old stack from the new in 
+ * To transfer information to the old stack from the new in
  * respect to LRO and the inp_flags2, the new stack should set
  * the inp_flags2 to what it supports. The old stack in its
  * fini() function should call the tcp_handle_orphaned_packets()
@@ -544,13 +544,13 @@ typedef enum {
  * do is:
  * a) Make sure that the inp_flags2 is setup correctly
  *    for LRO. There are two flags that the previous
- *    stack may have set INP_MBUF_ACKCMP and 
+ *    stack may have set INP_MBUF_ACKCMP and
  *    INP_SUPPORTS_MBUFQ. If the new stack does not
  *    support these it *should* clear the flags.
  * b) Make sure that the timers are in the proper
  *    granularity that the stack wants. The stack
  *    should check the t_tmr_granularity field. Currently
- *    there are two values that it may hold 
+ *    there are two values that it may hold
  *    TCP_TMR_GRANULARITY_TICKS and TCP_TMR_GRANULARITY_USEC.
  *    Use the functions tcp_timer_convert(tp, granularity);
  *    to move the timers to the correct format for your stack.
@@ -558,14 +558,14 @@ typedef enum {
  * The new stack may also optionally query the tfb_chg_query
  * function if the old stack has one. The new stack may ask
  * for one of three entries and can also state to the old
- * stack its support for the INP_MBUF_ACKCMP and 
+ * stack its support for the INP_MBUF_ACKCMP and
  * INP_SUPPORTS_MBUFQ. This is important since if there are
  * queued ack's without that statement the old stack will
  * be forced to discard the queued acks. The requests that
  * can be made for information by the new stacks are:
  *
  * Note also that the tfb_tcp_fb_init() when called can
- * determine if a query is needed by looking at the 
+ * determine if a query is needed by looking at the
  * value passed in the ptr. The ptr is designed to be
  * set in with any allocated memory, but the address
  * of the condtion (ptr == &tp->t_fb_ptr) will be
@@ -573,17 +573,17 @@ typedef enum {
  * setup of a tcb (which means no query would be needed).
  * If, however, the value is not t_fb_ptr, then the caller
  * is in the middle of a stack switch and is the new stack.
- * A query would be appropriate (if the new stack support 
+ * A query would be appropriate (if the new stack support
  * the query mechanism).
  *
  * TCP_QUERY_SENDMAP - Query of outstanding data.
  * TCP_QUERY_TIMERS_UP	- Query about running timers.
- * TCP_SUPPORTED_LRO - Declaration in req_param of 
- *                     the inp_flags2 supported by 
+ * TCP_SUPPORTED_LRO - Declaration in req_param of
+ *                     the inp_flags2 supported by
  *                     the new stack.
  * TCP_QUERY_RACK_TIMES	- Enquire about various timestamps
  *                        and states the old stack may be in.
- * 
+ *
  * tfb_tcp_fb_fini is changed to add a flag to tell
  * the old stack if the tcb is being destroyed or
  * not. A one in the flag means the TCB is being
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index dafbaf6dc672..42cfb919e263 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -243,7 +243,6 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 	struct sockaddr_in6 udp_in6;
 #endif
 	struct udpcb *up;
-	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
@@ -252,13 +251,19 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
+		bool filtered;
+
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&udp_in[0],
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
-		if (filtered)
-			return (in_pcbrele_rlocked(inp));
+		if (in_pcbrele_rlocked(inp))
+			return (1);
+		if (filtered) {
+			INP_RUNLOCK(inp);
+			return (1);
+		}
 	}
 
 	off += sizeof(struct udphdr);
diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c
index 0987ea7e99ad..08702a2e81ab 100644
--- a/sys/netinet6/scope6.c
+++ b/sys/netinet6/scope6.c
@@ -505,8 +505,23 @@ in6_set_unicast_scopeid(struct in6_addr *in6, uint32_t scopeid)
 struct ifnet*
 in6_getlinkifnet(uint32_t zoneid)
 {
+	struct ifnet *ifp;
 
-	return (ifnet_byindex((u_short)zoneid));
+	ifp = ifnet_byindex((u_short)zoneid);
+
+	if (ifp == NULL)
+		return (NULL);
+
+	/* An interface might not be IPv6 capable. */
+	if (ifp->if_afdata[AF_INET6] == NULL) {
+		log(LOG_NOTICE,
+		    "%s: embedded scope points to an interface without "
+		    "IPv6: %s%%%d.\n", __func__,
+		    if_name(ifp), zoneid);
+		return (NULL);
+	}
+
+	return (ifp);
 }
 
 /*
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index 304effa26e01..b3ed16fda713 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -142,7 +142,6 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
 	struct socket *so;
 	struct mbuf *opts = NULL, *tmp_opts;
 	struct udpcb *up;
-	bool filtered;
 
 	INP_LOCK_ASSERT(inp);
 
@@ -151,13 +150,19 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
+		bool filtered;
+
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		filtered = (*up->u_tun_func)(n, off, inp,
 		    (struct sockaddr *)&fromsa[0], up->u_tun_ctx);
 		INP_RLOCK(inp);
-		if (filtered)
-			return (in_pcbrele_rlocked(inp));
+		if (in_pcbrele_rlocked(inp))
+			return (1);
+		if (filtered) {
+			INP_RUNLOCK(inp);
+			return (1);
+		}
 	}
 
 	off += sizeof(struct udphdr);
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 923633d76df7..c129c8c49921 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -196,7 +196,7 @@ SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Firewall");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
     CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
-    "Only do a single pass through ipfw when using dummynet(4)");
+    "Only do a single pass through ipfw when using dummynet(4), ipfw_nat or other divert(4)-like interfaces");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0,
     "Rule number auto-increment step");
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 4e03584b8f85..ee10a997c977 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -110,8 +110,6 @@
 
 #include <netpfil/pf/pfsync_nv.h>
 
-#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
-
 struct pfsync_bucket;
 struct pfsync_softc;
 
@@ -597,9 +595,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
 			if ((rpool_first == NULL) ||
 			    (TAILQ_NEXT(rpool_first, entries) != NULL)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("%s: can't recover routing information "
-				    "because of empty or bad redirection pool\n",
-				    __func__));
+				    "%s: can't recover routing information "
+				    "because of empty or bad redirection pool",
+				    __func__);
 				return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
 			}
 			rt = r->rt;
@@ -610,8 +608,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
 			 * give up on recovering.
 			 */
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("%s: can't recover routing information "
-			    "because of different ruleset\n", __func__));
+			    "%s: can't recover routing information "
+			    "because of different ruleset", __func__);
 			return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
 		}
 	break;
@@ -624,8 +622,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
 			rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname);
 			if (rt_kif == NULL) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("%s: unknown route interface: %s\n",
-				    __func__, sp->pfs_1400.rt_ifname));
+				    "%s: unknown route interface: %s",
+				    __func__, sp->pfs_1400.rt_ifname);
 				return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0);
 			}
 			rt = sp->pfs_1400.rt;
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 009f7e4d78b1..79c298c18b46 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -119,8 +119,6 @@
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
-#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
-
 SDT_PROVIDER_DEFINE(pf);
 SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int");
 SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *",
@@ -161,6 +159,7 @@ SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *");
 SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match,
     "int", "struct pf_keth_rule *");
 SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t");
+SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *");
 
 /*
  * Global variables
@@ -375,6 +374,8 @@ static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
 				int, u_int16_t);
 static int		 pf_check_proto_cksum(struct mbuf *, int, int,
 			    u_int8_t, sa_family_t);
+static int		 pf_walk_option(struct pf_pdesc *, struct ip *,
+			    int, int, u_short *);
 static int		 pf_walk_header(struct pf_pdesc *, struct ip *, u_short *);
 #ifdef INET6
 static int		 pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
@@ -4615,8 +4616,8 @@ pf_match_rcvif(struct mbuf *m, struct pf_krule *r)
 
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: kif == NULL, @%d via %s\n", __func__, r->nr,
-			r->rcv_ifname));
+		    "%s: kif == NULL, @%d via %s", __func__, r->nr,
+			r->rcv_ifname);
 		return (0);
 	}
 
@@ -4975,7 +4976,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
 	}
 	INP_RLOCK_ASSERT(inp);
 	pd->lookup.uid = inp->inp_cred->cr_uid;
-	pd->lookup.gid = inp->inp_cred->cr_groups[0];
+	pd->lookup.gid = inp->inp_cred->cr_gid;
 	INP_RUNLOCK(inp);
 
 	return (1);
@@ -5242,8 +5243,8 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
 	if (__predict_false(m->m_len < sizeof(struct ether_header)) &&
 	    (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: m_len < sizeof(struct ether_header)"
-		     ", pullup failed\n", __func__));
+		    "%s: m_len < sizeof(struct ether_header)"
+		     ", pullup failed", __func__);
 		return (PF_DROP);
 	}
 	e = mtod(m, struct ether_header *);
@@ -5759,7 +5760,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		pd->lookup.uid = inp->inp_cred->cr_uid;
-		pd->lookup.gid = inp->inp_cred->cr_groups[0];
+		pd->lookup.gid = inp->inp_cred->cr_gid;
 		pd->lookup.done = 1;
 	}
 
@@ -6168,8 +6169,8 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
 		    &s->src, &s->dst, &ctx->rewrite)) {
 			/* This really shouldn't happen!!! */
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("%s: tcp normalize failed on first "
-			     "pkt\n", __func__));
+			    "%s: tcp normalize failed on first "
+			     "pkt", __func__);
 			goto csfailed;
 		}
 	} else if (pd->proto == IPPROTO_SCTP) {
@@ -7398,7 +7399,7 @@ pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
 {
 	struct pf_sctp_multihome_job	*j, *tmp;
 	struct pf_sctp_source		*i;
-	int			 ret __unused;
+	int			 ret;
 	struct pf_kstate	*sm = NULL;
 	struct pf_krule		*ra = NULL;
 	struct pf_krule		*r = &V_pf_default_rule;
@@ -7965,8 +7966,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(ip)\n"));
+				    "pf: ICMP error message too short "
+				    "(ip)");
 				return (PF_DROP);
 			}
 			/*
@@ -7996,8 +7997,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(ip6)\n"));
+				    "pf: ICMP error message too short "
+				    "(ip6)");
 				return (PF_DROP);
 			}
 			pd2.off = ipoff2;
@@ -8049,8 +8050,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(tcp)\n"));
+				    "pf: ICMP error message too short "
+				    "(tcp)");
 				return (PF_DROP);
 			}
 			pd2.pcksum = &pd2.hdr.tcp.th_sum;
@@ -8244,8 +8245,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(udp)\n"));
+				    "pf: ICMP error message too short "
+				    "(udp)");
 				return (PF_DROP);
 			}
 			pd2.pcksum = &pd2.hdr.udp.uh_sum;
@@ -8376,8 +8377,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(sctp)\n"));
+				    "pf: ICMP error message too short "
+				    "(sctp)");
 				return (PF_DROP);
 			}
 			pd2.pcksum = &pd2.sctp_dummy_sum;
@@ -8407,8 +8408,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 
 			if (src->scrub->pfss_v_tag != sh->v_tag) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message has incorrect "
-				    "SCTP v_tag\n"));
+				    "pf: ICMP error message has incorrect "
+				    "SCTP v_tag");
 				return (PF_DROP);
 			}
 
@@ -8531,8 +8532,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN,
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short i"
-				    "(icmp)\n"));
+				    "pf: ICMP error message too short i"
+				    "(icmp)");
 				return (PF_DROP);
 			}
 			pd2.pcksum = &pd2.hdr.icmp.icmp_cksum;
@@ -8651,8 +8652,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			if (!pf_pull_hdr(pd->m, pd2.off, iih,
 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: ICMP error message too short "
-				    "(icmp6)\n"));
+				    "pf: ICMP error message too short "
+				    "(icmp6)");
 				return (PF_DROP);
 			}
 			pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum;
@@ -9082,7 +9083,7 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
 		}
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
+			    "%s: m0->m_len < sizeof(struct ip)", __func__);
 			SDT_PROBE1(pf, ip, route_to, drop, __LINE__);
 			goto bad;
 		}
@@ -9387,8 +9388,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp,
 		}
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
-			    __func__));
+			    "%s: m0->m_len < sizeof(struct ip6_hdr)",
+			    __func__);
 			SDT_PROBE1(pf, ip6, route_to, drop, __LINE__);
 			goto bad;
 		}
@@ -9683,7 +9684,7 @@ pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
 
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: kif == NULL, if_xname %s\n", __func__, ifp->if_xname));
+		    "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
@@ -9798,6 +9799,62 @@ pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s,
 }
 
 static int
+pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
+    u_short *reason)
+{
+	uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
+
+	/* IP header in payload of ICMP packet may be too short */
+	if (pd->m->m_pkthdr.len < end) {
+		DPFPRINTF(PF_DEBUG_MISC, "IP option too short");
+		REASON_SET(reason, PFRES_SHORT);
+		return (PF_DROP);
+	}
+
+	MPASS(end - off <= sizeof(opts));
+	m_copydata(pd->m, off, end - off, opts);
+	end -= off;
+	off = 0;
+
+	while (off < end) {
+		type = opts[off];
+		if (type == IPOPT_EOL)
+			break;
+		if (type == IPOPT_NOP) {
+			off++;
+			continue;
+		}
+		if (off + 2 > end) {
+			DPFPRINTF(PF_DEBUG_MISC, "IP length opt");
+			REASON_SET(reason, PFRES_IPOPTIONS);
+			return (PF_DROP);
+		}
+		length = opts[off + 1];
+		if (length < 2) {
+			DPFPRINTF(PF_DEBUG_MISC, "IP short opt");
+			REASON_SET(reason, PFRES_IPOPTIONS);
+			return (PF_DROP);
+		}
+		if (off + length > end) {
+			DPFPRINTF(PF_DEBUG_MISC, "IP long opt");
+			REASON_SET(reason, PFRES_IPOPTIONS);
+			return (PF_DROP);
+		}
+		switch (type) {
+		case IPOPT_RA:
+			pd->badopts |= PF_OPT_ROUTER_ALERT;
+			break;
+		default:
+			pd->badopts |= PF_OPT_OTHER;
+			break;
+		}
+		off += length;
+	}
+
+	return (PF_PASS);
+}
+
+static int
 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
 {
 	struct ah	 ext;
@@ -9809,11 +9866,28 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
 		REASON_SET(reason, PFRES_SHORT);
 		return (PF_DROP);
 	}
-	if (hlen != sizeof(struct ip))
-		pd->badopts++;
+	if (hlen != sizeof(struct ip)) {
+		if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
+		    pd->off + hlen, reason) != PF_PASS)
+			return (PF_DROP);
+		/* header options which contain only padding is fishy */
+		if (pd->badopts == 0)
+			pd->badopts |= PF_OPT_OTHER;
+	}
 	end = pd->off + ntohs(h->ip_len);
 	pd->off += hlen;
 	pd->proto = h->ip_p;
+	/* IGMP packets have router alert options, allow them */
+	if (pd->proto == IPPROTO_IGMP) {
+		/* According to RFC 1112 ttl must be set to 1. */
+		if ((h->ip_ttl != 1) ||
+		    !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) {
+			DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP");
+			REASON_SET(reason, PFRES_IPOPTIONS);
+			return (PF_DROP);
+		}
+		pd->badopts &= ~PF_OPT_ROUTER_ALERT;
+	}
 	/* stop walking over non initial fragments */
 	if ((h->ip_off & htons(IP_OFFMASK)) != 0)
 		return (PF_PASS);
@@ -9826,7 +9900,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
 				return (PF_PASS);
 			if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
 				NULL, reason, AF_INET)) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IP short exthdr"));
+				DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr");
 				return (PF_DROP);
 			}
 			pd->off += (ext.ah_len + 2) * 4;
@@ -9836,7 +9910,7 @@ pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
 			return (PF_PASS);
 		}
 	}
-	DPFPRINTF(PF_DEBUG_MISC, ("IPv4 nested authentication header limit"));
+	DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit");
 	REASON_SET(reason, PFRES_IPOPTIONS);
 	return (PF_DROP);
 }
@@ -9852,7 +9926,7 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
 	while (off < end) {
 		if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
 		    sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) {
-			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt type"));
+			DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type");
 			return (PF_DROP);
 		}
 		if (opt.ip6o_type == IP6OPT_PAD1) {
@@ -9861,41 +9935,48 @@ pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
 		}
 		if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL,
 		    reason, AF_INET6)) {
-			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short opt"));
+			DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt");
 			return (PF_DROP);
 		}
 		if (off + sizeof(opt) + opt.ip6o_len > end) {
-			DPFPRINTF(PF_DEBUG_MISC, ("IPv6 long opt"));
+			DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt");
 			REASON_SET(reason, PFRES_IPOPTIONS);
 			return (PF_DROP);
 		}
 		switch (opt.ip6o_type) {
+		case IP6OPT_PADN:
+			break;
 		case IP6OPT_JUMBO:
+			pd->badopts |= PF_OPT_JUMBO;
 			if (pd->jumbolen != 0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple jumbo"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
 			if (ntohs(h->ip6_plen) != 0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 bad jumbo plen"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
 			if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL,
 				reason, AF_INET6)) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbo"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo");
 				return (PF_DROP);
 			}
 			memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
 			    sizeof(pd->jumbolen));
 			pd->jumbolen = ntohl(pd->jumbolen);
 			if (pd->jumbolen < IPV6_MAXPACKET) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short jumbolen"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
 			break;
+		case IP6OPT_ROUTER_ALERT:
+			pd->badopts |= PF_OPT_ROUTER_ALERT;
+			break;
 		default:
+			pd->badopts |= PF_OPT_OTHER;
 			break;
 		}
 		off += sizeof(opt) + opt.ip6o_len;
@@ -9909,6 +9990,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 {
 	struct ip6_frag		 frag;
 	struct ip6_ext		 ext;
+	struct icmp6_hdr	 icmp6;
 	struct ip6_rthdr	 rthdr;
 	uint32_t		 end;
 	int			 hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
@@ -9920,27 +10002,40 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 	for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) {
 		switch (pd->proto) {
 		case IPPROTO_ROUTING:
-		case IPPROTO_HOPOPTS:
 		case IPPROTO_DSTOPTS:
-			pd->badopts++;
+			pd->badopts |= PF_OPT_OTHER;
+			break;
+		case IPPROTO_HOPOPTS:
+			if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
+			    NULL, reason, AF_INET6)) {
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
+				return (PF_DROP);
+			}
+			if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
+				pd->off + (ext.ip6e_len + 1) * 8,
+				reason) != PF_PASS)
+				return (PF_DROP);
+			/* option header which contains only padding is fishy */
+			if (pd->badopts == 0)
+				pd->badopts |= PF_OPT_OTHER;
 			break;
 		}
 		switch (pd->proto) {
 		case IPPROTO_FRAGMENT:
 			if (fraghdr_cnt++) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple fragment"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment");
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 			/* jumbo payload packets cannot be fragmented */
 			if (pd->jumbolen != 0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 fragmented jumbo"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo");
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 			if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
 			    NULL, reason, AF_INET6)) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short fragment"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment");
 				return (PF_DROP);
 			}
 			/* stop walking over non initial fragments */
@@ -9956,7 +10051,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 			break;
 		case IPPROTO_ROUTING:
 			if (rthdr_cnt++) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple rthdr"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
@@ -9968,11 +10063,11 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 			}
 			if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
 			    NULL, reason, AF_INET6)) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short rthdr"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr");
 				return (PF_DROP);
 			}
 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 rthdr0"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
@@ -9980,7 +10075,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 		case IPPROTO_HOPOPTS:
 			/* RFC2460 4.1:  Hop-by-Hop only after IPv6 header */
 			if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 hopopts not first"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first");
 				REASON_SET(reason, PFRES_IPOPTIONS);
 				return (PF_DROP);
 			}
@@ -9989,7 +10084,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 		case IPPROTO_DSTOPTS:
 			if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
 			    NULL, reason, AF_INET6)) {
-				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short exthdr"));
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr");
 				return (PF_DROP);
 			}
 			/* fragments may be short */
@@ -10001,18 +10096,11 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 			/* reassembly needs the ext header before the frag */
 			if (pd->fragoff == 0)
 				pd->extoff = pd->off;
-			if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) {
-				if (pf_walk_option6(pd, h,
-				    pd->off + sizeof(ext),
-				    pd->off + (ext.ip6e_len + 1) * 8, reason)
-				    != PF_PASS)
-					return (PF_DROP);
-				if (ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
-					DPFPRINTF(PF_DEBUG_MISC,
-					    ("IPv6 missing jumbo"));
-					REASON_SET(reason, PFRES_IPOPTIONS);
-					return (PF_DROP);
-				}
+			if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
+			    ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
+				DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo");
+				REASON_SET(reason, PFRES_IPOPTIONS);
+				return (PF_DROP);
 			}
 			if (pd->proto == IPPROTO_AH)
 				pd->off += (ext.ip6e_len + 2) * 4;
@@ -10020,10 +10108,45 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 				pd->off += (ext.ip6e_len + 1) * 8;
 			pd->proto = ext.ip6e_nxt;
 			break;
+		case IPPROTO_ICMPV6:
+			/* fragments may be short, ignore inner header then */
+			if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
+				pd->off = pd->fragoff;
+				pd->proto = IPPROTO_FRAGMENT;
+				return (PF_PASS);
+			}
+			if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
+				NULL, reason, AF_INET6)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    "IPv6 short icmp6hdr");
+				return (PF_DROP);
+			}
+			/* ICMP multicast packets have router alert options */
+			switch (icmp6.icmp6_type) {
+			case MLD_LISTENER_QUERY:
+			case MLD_LISTENER_REPORT:
+			case MLD_LISTENER_DONE:
+			case MLDV2_LISTENER_REPORT:
+				/*
+				 * According to RFC 2710 all MLD messages are
+				 * sent with hop-limit (ttl) set to 1, and link
+				 * local source address.  If either one is
+				 * missing then MLD message is invalid and
+				 * should be discarded.
+				 */
+				if ((h->ip6_hlim != 1) ||
+				    !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
+					DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD");
+					REASON_SET(reason, PFRES_IPOPTIONS);
+					return (PF_DROP);
+				}
+				pd->badopts &= ~PF_OPT_ROUTER_ALERT;
+				break;
+			}
+			return (PF_PASS);
 		case IPPROTO_TCP:
 		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
-		case IPPROTO_ICMPV6:
 			/* fragments may be short, ignore inner header then */
 			if (pd->fragoff != 0 && end < pd->off +
 			    (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
@@ -10038,7 +10161,7 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 			return (PF_PASS);
 		}
 	}
-	DPFPRINTF(PF_DEBUG_MISC, ("IPv6 nested extension header limit"));
+	DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit");
 	REASON_SET(reason, PFRES_IPOPTIONS);
 	return (PF_DROP);
 }
@@ -10083,8 +10206,15 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		if (__predict_false((*m0)->m_len < sizeof(struct ip)) &&
 		    (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) {
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("%s: m_len < sizeof(struct ip), pullup failed\n",
-			    __func__));
+			    "%s: m_len < sizeof(struct ip), pullup failed",
+			    __func__);
+			*action = PF_DROP;
+			REASON_SET(reason, PFRES_SHORT);
+			return (-1);
+		}
+
+		h = mtod(pd->m, struct ip *);
+		if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
 			*action = PF_DROP;
 			REASON_SET(reason, PFRES_SHORT);
 			return (-1);
@@ -10097,13 +10227,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 			return (-1);
 		}
 		*m0 = pd->m;
-
 		h = mtod(pd->m, struct ip *);
-		if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
-			*action = PF_DROP;
-			REASON_SET(reason, PFRES_SHORT);
-			return (-1);
-		}
 
 		if (pf_walk_header(pd, h, reason) != PF_PASS) {
 			*action = PF_DROP;
@@ -10133,8 +10257,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) &&
 		    (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) {
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("%s: m_len < sizeof(struct ip6_hdr)"
-			     ", pullup failed\n", __func__));
+			    "%s: m_len < sizeof(struct ip6_hdr)"
+			     ", pullup failed", __func__);
 			*action = PF_DROP;
 			REASON_SET(reason, PFRES_SHORT);
 			return (-1);
@@ -10148,6 +10272,15 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 			return (-1);
 		}
 
+		/*
+		 * we do not support jumbogram.  if we keep going, zero ip6_plen
+		 * will do something bad, so drop the packet for now.
+		 */
+		if (htons(h->ip6_plen) == 0) {
+			*action = PF_DROP;
+			return (-1);
+		}
+
 		if (pf_walk_header6(pd, h, reason) != PF_PASS) {
 			*action = PF_DROP;
 			return (-1);
@@ -10167,15 +10300,6 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		pd->virtual_proto = (pd->fragoff != 0) ?
 		    PF_VPROTO_FRAGMENT : pd->proto;
 
-		/*
-		 * we do not support jumbogram.  if we keep going, zero ip6_plen
-		 * will do something bad, so drop the packet for now.
-		 */
-		if (htons(h->ip6_plen) == 0) {
-			*action = PF_DROP;
-			return (-1);
-		}
-
 		/* We do IP header normalization and packet reassembly here */
 		if (pf_normalize_ip6(pd->fragoff, reason, pd) !=
 		    PF_PASS) {
@@ -10494,8 +10618,8 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
 
 	if (__predict_false(kif == NULL)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: kif == NULL, if_xname %s\n",
-		    __func__, ifp->if_xname));
+		    "%s: kif == NULL, if_xname %s",
+		    __func__, ifp->if_xname);
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
@@ -10699,14 +10823,14 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_NORM);
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("dropping IPv6 packet with ICMPv4 payload"));
+			    "dropping IPv6 packet with ICMPv4 payload");
 			break;
 		}
 		if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_NORM);
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
+			    "pf: dropping IPv4 packet with ICMPv6 payload");
 			break;
 		}
 		action = pf_test_state_icmp(&s, &pd, &reason);
@@ -10732,12 +10856,12 @@ done:
 	if (s)
 		memcpy(&pd.act, &s->act, sizeof(s->act));
 
-	if (action == PF_PASS && pd.badopts && !pd.act.allow_opts) {
+	if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		pd.act.log = PF_LOG_FORCE;
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pf: dropping packet with dangerous headers\n"));
+		    "pf: dropping packet with dangerous headers");
 	}
 
 	if (pd.act.max_pkt_size && pd.act.max_pkt_size &&
@@ -10746,7 +10870,7 @@ done:
 		REASON_SET(&reason, PFRES_NORM);
 		pd.act.log = PF_LOG_FORCE;
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pf: dropping overly long packet\n"));
+		    "pf: dropping overly long packet");
 	}
 
 	if (s) {
@@ -10778,7 +10902,7 @@ done:
 			REASON_SET(&reason, PFRES_MEMORY);
 			pd.act.log = PF_LOG_FORCE;
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("pf: failed to allocate 802.1q mtag\n"));
+			    "pf: failed to allocate 802.1q mtag");
 		}
 	}
 
@@ -10835,7 +10959,7 @@ done:
 					REASON_SET(&reason, PFRES_MEMORY);
 					pd.act.log = PF_LOG_FORCE;
 					DPFPRINTF(PF_DEBUG_MISC,
-					    ("pf: failed to allocate tag\n"));
+					    "pf: failed to allocate tag");
 				} else {
 					pd.pf_mtag->flags |=
 					    PF_MTAG_FLAG_FASTFWD_OURS_PRESENT;
@@ -10852,7 +10976,7 @@ done:
 			REASON_SET(&reason, PFRES_MEMORY);
 			pd.act.log = PF_LOG_FORCE;
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("pf: failed to allocate divert tag\n"));
+			    "pf: failed to allocate divert tag");
 		}
 	}
 	/* XXX: Anybody working on it?! */
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 5c69c395c5fc..9abc07c36788 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -217,8 +217,6 @@ static u_int16_t	 tagname2tag(struct pf_tagset *, const char *);
 static u_int16_t	 pf_tagname2tag(const char *);
 static void		 tag_unref(struct pf_tagset *, u_int16_t);
 
-#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
-
 struct cdev *pf_dev;
 
 /*
@@ -2094,19 +2092,18 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
 	int			 rs_num;
 	int			 error = 0;
 
-	if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
-		error = EINVAL;
-		goto errout_unlocked;
-	}
+#define	ERROUT(x)		ERROUT_FUNCTION(errout, x)
+#define	ERROUT_UNLOCKED(x)	ERROUT_FUNCTION(errout_unlocked, x)
 
-#define	ERROUT(x)	ERROUT_FUNCTION(errout, x)
+	if ((rule->return_icmp >> 8) > ICMP_MAXTYPE)
+		ERROUT_UNLOCKED(EINVAL);
 
 	if ((error = pf_rule_checkaf(rule)))
-		ERROUT(error);
+		ERROUT_UNLOCKED(error);
 	if (pf_validate_range(rule->src.port_op, rule->src.port))
-		ERROUT(EINVAL);
+		ERROUT_UNLOCKED(EINVAL);
 	if (pf_validate_range(rule->dst.port_op, rule->dst.port))
-		ERROUT(EINVAL);
+		ERROUT_UNLOCKED(EINVAL);
 
 	if (rule->ifname[0])
 		kif = pf_kkif_create(M_WAITOK);
@@ -2143,14 +2140,14 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
 		ERROUT(EINVAL);
 	if (ticket != ruleset->rules[rs_num].inactive.ticket) {
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("ticket: %d != [%d]%d\n", ticket, rs_num,
-		    ruleset->rules[rs_num].inactive.ticket));
+		    "ticket: %d != [%d]%d", ticket, rs_num,
+		    ruleset->rules[rs_num].inactive.ticket);
 		ERROUT(EBUSY);
 	}
 	if (pool_ticket != V_ticket_pabuf) {
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pool_ticket: %d != %d\n", pool_ticket,
-		    V_ticket_pabuf));
+		    "pool_ticket: %d != %d", pool_ticket,
+		    V_ticket_pabuf);
 		ERROUT(EBUSY);
 	}
 	/*
@@ -2296,6 +2293,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
 	return (0);
 
 #undef ERROUT
+#undef ERROUT_UNLOCKED
 errout:
 	PF_RULES_WUNLOCK();
 	PF_CONFIG_UNLOCK();
@@ -2469,7 +2467,7 @@ pf_start(void)
 		V_pf_status.since = time_uptime;
 		new_unrhdr64(&V_pf_stateid, time_second);
 
-		DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "pf: started");
 	}
 	sx_xunlock(&V_pf_ioctl_lock);
 
@@ -2489,7 +2487,7 @@ pf_stop(void)
 		dehook_pf();
 		dehook_pf_eth();
 		V_pf_status.since = time_uptime;
-		DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "pf: stopped");
 	}
 	sx_xunlock(&V_pf_ioctl_lock);
 
@@ -3264,9 +3262,9 @@ DIOCGETETHRULE_error:
 		if (nvlist_get_number(nvl, "ticket") !=
 		    ruleset->inactive.ticket) {
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("ticket: %d != %d\n",
+			    "ticket: %d != %d",
 			    (u_int32_t)nvlist_get_number(nvl, "ticket"),
-			    ruleset->inactive.ticket));
+			    ruleset->inactive.ticket);
 			ERROUT(EBUSY);
 		}
 
@@ -4340,7 +4338,7 @@ DIOCGETSTATESV2_full:
 		if (error == 0)
 			V_pf_altq_running = 1;
 		PF_RULES_WUNLOCK();
-		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "altq: started");
 		break;
 	}
 
@@ -4359,7 +4357,7 @@ DIOCGETSTATESV2_full:
 		if (error == 0)
 			V_pf_altq_running = 0;
 		PF_RULES_WUNLOCK();
-		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "altq: stopped");
 		break;
 	}
 
@@ -6457,9 +6455,9 @@ shutdown_pf(void)
 			for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) {
 				if ((error = pf_begin_rules(&t[rs_num], rs_num,
 				    anchor->path)) != 0) {
-					DPFPRINTF(PF_DEBUG_MISC, ("%s: "
-					    "anchor.path=%s rs_num=%d\n",
-					    __func__, anchor->path, rs_num));
+					DPFPRINTF(PF_DEBUG_MISC, "%s: "
+					    "anchor.path=%s rs_num=%d",
+					    __func__, anchor->path, rs_num);
 					goto error;	/* XXX: rollback? */
 				}
 			}
@@ -6481,9 +6479,9 @@ shutdown_pf(void)
 				eth_anchor->refcnt = 1;
 			if ((error = pf_begin_eth(&t[0], eth_anchor->path))
 			    != 0) {
-				DPFPRINTF(PF_DEBUG_MISC, ("%s: eth "
-				    "anchor.path=%s\n", __func__,
-				    eth_anchor->path));
+				DPFPRINTF(PF_DEBUG_MISC, "%s: eth "
+				    "anchor.path=%s", __func__,
+				    eth_anchor->path);
 				goto error;
 			}
 			error = pf_commit_eth(t[0], eth_anchor->path);
@@ -6492,27 +6490,27 @@ shutdown_pf(void)
 
 		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
 		    != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: SCRUB\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: SCRUB", __func__);
 			break;
 		}
 		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
 		    != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: FILTER\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: FILTER", __func__);
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
 		    != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: NAT\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: NAT", __func__);
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
 		    != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: BINAT\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: BINAT", __func__);
 			break;		/* XXX: rollback? */
 		}
 		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
 		    != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: RDR\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: RDR", __func__);
 			break;		/* XXX: rollback? */
 		}
 
@@ -6531,7 +6529,7 @@ shutdown_pf(void)
 			break;
 
 		if ((error = pf_begin_eth(&t[0], &nn)) != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: eth\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: eth", __func__);
 			break;
 		}
 		error = pf_commit_eth(t[0], &nn);
@@ -6539,7 +6537,7 @@ shutdown_pf(void)
 
 #ifdef ALTQ
 		if ((error = pf_begin_altq(&t[0])) != 0) {
-			DPFPRINTF(PF_DEBUG_MISC, ("%s: ALTQ\n", __func__));
+			DPFPRINTF(PF_DEBUG_MISC, "%s: ALTQ", __func__);
 			break;
 		}
 		pf_commit_altq(t[0]);
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 9c7863bb301e..ea0d6facf695 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -71,8 +71,6 @@
 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
 
-#define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
-
 static uint64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
 			    struct pf_poolhashkey *, sa_family_t);
 struct pf_krule		*pf_match_translation(int, struct pf_test_ctx *);
@@ -904,19 +902,19 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 			if (pf_get_mape_sport(pd, r, naddr, nportp,
 			    &ctx->udp_mapping, rpool)) {
 				DPFPRINTF(PF_DEBUG_MISC,
-				    ("pf: MAP-E port allocation (%u/%u/%u)"
-				    " failed\n",
+				    "pf: MAP-E port allocation (%u/%u/%u)"
+				    " failed",
 				    rpool->mape.offset,
 				    rpool->mape.psidlen,
-				    rpool->mape.psid));
+				    rpool->mape.psid);
 				reason = PFRES_MAPFAILED;
 				goto notrans;
 			}
 		} else if (pf_get_sport(pd, r, naddr, nportp, low, high,
 		    rpool, &ctx->udp_mapping, PF_SN_NAT)) {
 			DPFPRINTF(PF_DEBUG_MISC,
-			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
-			    rpool->proxy_port[0], rpool->proxy_port[1]));
+			    "pf: NAT proxy port allocation (%u-%u) failed",
+			    rpool->proxy_port[0], rpool->proxy_port[1]);
 			reason = PFRES_MAPFAILED;
 			goto notrans;
 		}
@@ -1085,13 +1083,13 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 		 * the state may be reused if the TCP state is terminal.
 		 */
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pf: RDR source port allocation failed\n"));
+		    "pf: RDR source port allocation failed");
 		break;
 
 out:
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pf: RDR source port allocation %u->%u\n",
-		    ntohs(pd->nsport), ntohs(ctx->nk->port[0])));
+		    "pf: RDR source port allocation %u->%u",
+		    ntohs(pd->nsport), ntohs(ctx->nk->port[0]));
 		break;
 	}
 	default:
@@ -1140,8 +1138,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
 	if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0],
 	    r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) {
 		DPFPRINTF(PF_DEBUG_MISC,
-		    ("pf: af-to NAT proxy port allocation (%u-%u) failed",
-		    r->nat.proxy_port[0], r->nat.proxy_port[1]));
+		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
+		    r->nat.proxy_port[0], r->nat.proxy_port[1]);
 		return (-1);
 	}
 
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index 369292ca365e..8cea9557633c 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -160,13 +160,6 @@ static int	pf_reassemble6(struct mbuf **,
 		    struct ip6_frag *, uint16_t, uint16_t, u_short *);
 #endif	/* INET6 */
 
-#define	DPFPRINTF(x) do {				\
-	if (V_pf_status.debug >= PF_DEBUG_MISC) {	\
-		printf("%s: ", __func__);		\
-		printf x ;				\
-	}						\
-} while(0)
-
 #ifdef INET
 static void
 pf_ip2key(struct ip *ip, struct pf_frnode *key)
@@ -262,7 +255,8 @@ pf_purge_fragments(uint32_t expire)
 		if (frag->fr_timeout > expire)
 			break;
 
-		DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
+		DPFPRINTF(PF_DEBUG_MISC, "expiring %d(%p)",
+		    frag->fr_id, frag);
 		pf_free_fragment(frag);
 	}
 
@@ -281,7 +275,7 @@ pf_flush_fragments(void)
 	PF_FRAG_ASSERT();
 
 	goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10;
-	DPFPRINTF(("trying to free %d frag entriess\n", goal));
+	DPFPRINTF(PF_DEBUG_MISC, "trying to free %d frag entriess", goal);
 	while (goal < uma_zone_get_cur(V_pf_frent_z)) {
 		frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue);
 		if (frag)
@@ -573,26 +567,30 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 
 	/* No empty fragments. */
 	if (frent->fe_len == 0) {
-		DPFPRINTF(("bad fragment: len 0\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "bad fragment: len 0");
 		goto bad_fragment;
 	}
 
 	/* All fragments are 8 byte aligned. */
 	if (frent->fe_mff && (frent->fe_len & 0x7)) {
-		DPFPRINTF(("bad fragment: mff and len %d\n", frent->fe_len));
+		DPFPRINTF(PF_DEBUG_MISC, "bad fragment: mff and len %d",
+		    frent->fe_len);
 		goto bad_fragment;
 	}
 
 	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
 	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
-		DPFPRINTF(("bad fragment: max packet %d\n",
-		    frent->fe_off + frent->fe_len));
+		DPFPRINTF(PF_DEBUG_MISC, "bad fragment: max packet %d",
+		    frent->fe_off + frent->fe_len);
 		goto bad_fragment;
 	}
 
-	DPFPRINTF((key->fn_af == AF_INET ?
-	    "reass frag %d @ %d-%d\n" : "reass frag %#08x @ %d-%d\n",
-	    id, frent->fe_off, frent->fe_off + frent->fe_len));
+	if (key->fn_af == AF_INET)
+		DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d\n",
+		    id, frent->fe_off, frent->fe_off + frent->fe_len);
+	else 
+		DPFPRINTF(PF_DEBUG_MISC, "reass frag %#08x @ %d-%d",
+		    id, frent->fe_off, frent->fe_off + frent->fe_len);
 
 	/* Fully buffer all of the fragments in this fragment queue. */
 	frag = pf_find_fragment(key, id);
@@ -690,10 +688,10 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 
 		precut = prev->fe_off + prev->fe_len - frent->fe_off;
 		if (precut >= frent->fe_len) {
-			DPFPRINTF(("new frag overlapped\n"));
+			DPFPRINTF(PF_DEBUG_MISC, "new frag overlapped");
 			goto drop_fragment;
 		}
-		DPFPRINTF(("frag head overlap %d\n", precut));
+		DPFPRINTF(PF_DEBUG_MISC, "frag head overlap %d", precut);
 		m_adj(frent->fe_m, precut);
 		frent->fe_off += precut;
 		frent->fe_len -= precut;
@@ -705,7 +703,8 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 
 		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
 		if (aftercut < after->fe_len) {
-			DPFPRINTF(("frag tail overlap %d", aftercut));
+			DPFPRINTF(PF_DEBUG_MISC, "frag tail overlap %d",
+			    aftercut);
 			m_adj(after->fe_m, aftercut);
 			/* Fragment may switch queue as fe_off changes */
 			pf_frent_remove(frag, after);
@@ -713,7 +712,8 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 			after->fe_len -= aftercut;
 			/* Insert into correct queue */
 			if (pf_frent_insert(frag, after, prev)) {
-				DPFPRINTF(("fragment requeue limit exceeded"));
+				DPFPRINTF(PF_DEBUG_MISC,
+				    "fragment requeue limit exceeded");
 				m_freem(after->fe_m);
 				uma_zfree(V_pf_frent_z, after);
 				/* There is not way to recover */
@@ -723,7 +723,7 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 		}
 
 		/* This fragment is completely overlapped, lose it. */
-		DPFPRINTF(("old frag overlapped\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "old frag overlapped");
 		next = TAILQ_NEXT(after, fr_next);
 		pf_frent_remove(frag, after);
 		m_freem(after->fe_m);
@@ -732,7 +732,7 @@ pf_fillup_fragment(struct pf_frnode *key, uint32_t id,
 
 	/* If part of the queue gets too long, there is not way to recover. */
 	if (pf_frent_insert(frag, frent, prev)) {
-		DPFPRINTF(("fragment queue limit exceeded\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "fragment queue limit exceeded");
 		goto bad_fragment;
 	}
 
@@ -748,7 +748,7 @@ free_fragment:
 	 * fragment, the entire datagram (and any constituent fragments) MUST
 	 * be silently discarded.
 	 */
-	DPFPRINTF(("flush overlapping fragments\n"));
+	DPFPRINTF(PF_DEBUG_MISC, "flush overlapping fragments");
 	pf_free_fragment(frag);
 
 bad_fragment:
@@ -826,7 +826,8 @@ pf_reassemble(struct mbuf **m0, u_short *reason)
 	m = *m0 = NULL;
 
 	if (frag->fr_holes) {
-		DPFPRINTF(("frag %d, holes %d\n", frag->fr_id, frag->fr_holes));
+		DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d",
+		    frag->fr_id, frag->fr_holes);
 		return (PF_PASS);  /* drop because *m0 is NULL, no error */
 	}
 
@@ -872,14 +873,14 @@ pf_reassemble(struct mbuf **m0, u_short *reason)
 	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
 
 	if (hdrlen + total > IP_MAXPACKET) {
-		DPFPRINTF(("drop: too big: %d\n", total));
+		DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total);
 		ip->ip_len = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
 		return (PF_DROP);
 	}
 
-	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
+	DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m, ntohs(ip->ip_len));
 	return (PF_PASS);
 }
 #endif	/* INET */
@@ -931,8 +932,8 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
 	m = *m0 = NULL;
 
 	if (frag->fr_holes) {
-		DPFPRINTF(("frag %d, holes %d\n", frag->fr_id,
-		    frag->fr_holes));
+		DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d", frag->fr_id,
+		    frag->fr_holes);
 		PF_FRAG_UNLOCK();
 		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
 	}
@@ -993,14 +994,15 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
 		ip6->ip6_nxt = proto;
 
 	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
-		DPFPRINTF(("drop: too big: %d\n", total));
+		DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total);
 		ip6->ip6_plen = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
 		return (PF_DROP);
 	}
 
-	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip6->ip6_plen)));
+	DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m,
+	    ntohs(ip6->ip6_plen));
 	return (PF_PASS);
 
 fail:
@@ -1090,7 +1092,7 @@ pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag,
 		action = PF_PASS;
 	} else {
 		/* Drop expects an mbuf to free. */
-		DPFPRINTF(("refragment error %d\n", error));
+		DPFPRINTF(PF_DEBUG_MISC, "refragment error %d", error);
 		action = PF_DROP;
 	}
 	for (; m; m = t) {
@@ -1230,7 +1232,7 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd)
 	 * no-df above, fine. Otherwise drop it.
 	 */
 	if (h->ip_off & htons(IP_DF)) {
-		DPFPRINTF(("IP_DF\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "IP_DF");
 		goto bad;
 	}
 
@@ -1238,13 +1240,13 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd)
 
 	/* All fragments are 8 byte aligned */
 	if (mff && (ip_len & 0x7)) {
-		DPFPRINTF(("mff and %d\n", ip_len));
+		DPFPRINTF(PF_DEBUG_MISC, "mff and %d", ip_len);
 		goto bad;
 	}
 
 	/* Respect maximum length */
 	if (fragoff + ip_len > IP_MAXPACKET) {
-		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
+		DPFPRINTF(PF_DEBUG_MISC, "max packet %d", fragoff + ip_len);
 		goto bad;
 	}
 
@@ -1256,7 +1258,8 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd)
 		/* Fully buffer all of the fragments
 		 * Might return a completely reassembled mbuf, or NULL */
 		PF_FRAG_LOCK();
-		DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
+		DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d",
+		    h->ip_id, fragoff, max);
 		verdict = pf_reassemble(&pd->m, reason);
 		PF_FRAG_UNLOCK();
 
@@ -1282,7 +1285,7 @@ pf_normalize_ip(u_short *reason, struct pf_pdesc *pd)
 	return (PF_PASS);
 
  bad:
-	DPFPRINTF(("dropping bad fragment\n"));
+	DPFPRINTF(PF_DEBUG_MISC, "dropping bad fragment");
 	REASON_SET(reason, PFRES_FRAG);
  drop:
 	if (r != NULL && r->log)
@@ -1711,7 +1714,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
 	    time_uptime - (state->creation / 1000) > TS_MAX_CONN))  {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
-			DPFPRINTF(("src idled out of PAWS\n"));
+			DPFPRINTF(PF_DEBUG_MISC, "src idled out of PAWS");
 			pf_print_state(state);
 			printf("\n");
 		}
@@ -1721,7 +1724,7 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
-			DPFPRINTF(("dst idled out of PAWS\n"));
+			DPFPRINTF(PF_DEBUG_MISC, "dst idled out of PAWS");
 			pf_print_state(state);
 			printf("\n");
 		}
@@ -1826,22 +1829,22 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
 			 *   an old timestamp.
 			 */
 
-			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
+			DPFPRINTF(PF_DEBUG_MISC, "Timestamp failed %c%c%c%c",
 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
 			    tsval_from_last) ? '1' : ' ',
 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
-			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
-			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
-			    "idle: %jus %lums\n",
+			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ');
+			DPFPRINTF(PF_DEBUG_MISC, " tsval: %u  tsecr: %u  +ticks: "
+			    "%u  idle: %jus %lums",
 			    tsval, tsecr, tsval_from_last,
 			    (uintmax_t)delta_ts.tv_sec,
-			    delta_ts.tv_usec / 1000));
-			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
-			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
-			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u"
-			    "\n", dst->scrub->pfss_tsval,
-			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
+			    delta_ts.tv_usec / 1000);
+			DPFPRINTF(PF_DEBUG_MISC, " src->tsval: %u  tsecr: %u",
+			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr);
+			DPFPRINTF(PF_DEBUG_MISC, " dst->tsval: %u  tsecr: %u  "
+			    "tsval0: %u", dst->scrub->pfss_tsval,
+			    dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0);
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				pf_print_state(state);
 				pf_print_flags(tcp_get_flags(th));
@@ -1891,8 +1894,8 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
 			 * stack changed its RFC1323 behavior?!?!
 			 */
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
-				DPFPRINTF(("Did not receive expected RFC1323 "
-				    "timestamp\n"));
+				DPFPRINTF(PF_DEBUG_MISC, "Did not receive expected "
+				    "RFC1323 timestamp");
 				pf_print_state(state);
 				pf_print_flags(tcp_get_flags(th));
 				printf("\n");
@@ -1919,9 +1922,9 @@ pf_normalize_tcp_stateful(struct pf_pdesc *pd,
 			if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
 				/* Don't warn if other host rejected RFC1323 */
-				DPFPRINTF(("Broken RFC1323 stack did not "
-				    "timestamp data packet. Disabled PAWS "
-				    "security.\n"));
+				DPFPRINTF(PF_DEBUG_MISC, "Broken RFC1323 stack did "
+				    "not timestamp data packet. Disabled PAWS "
+				    "security.");
 				pf_print_state(state);
 				pf_print_flags(tcp_get_flags(th));
 				printf("\n");
diff --git a/sys/netpfil/pf/pf_osfp.c b/sys/netpfil/pf/pf_osfp.c
index 3e00cc7c80a2..150626c5f3fb 100644
--- a/sys/netpfil/pf/pf_osfp.c
+++ b/sys/netpfil/pf/pf_osfp.c
@@ -40,9 +40,6 @@
 #endif
 
 static MALLOC_DEFINE(M_PFOSFP, "pf_osfp", "pf(4) operating system fingerprints");
-#define	DPFPRINTF(format, x...)		\
-	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
-		printf(format , ##x)
 
 SLIST_HEAD(pf_osfp_list, pf_os_fingerprint);
 VNET_DEFINE_STATIC(struct pf_osfp_list,	pf_osfp_list) =
@@ -189,8 +186,8 @@ pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, const st
 		optlen = MAX(optlen, 1);	/* paranoia */
 	}
 
-	DPFPRINTF("fingerprinted %s:%d  %d:%d:%d:%d:%llx (%d) "
-	    "(TS=%s,M=%s%d,W=%s%d)\n",
+	DPFPRINTF(PF_DEBUG_NOISY, "fingerprinted %s:%d  %d:%d:%d:%d:%llx (%d) "
+	    "(TS=%s,M=%s%d,W=%s%d)",
 	    srcname, ntohs(tcp->th_sport),
 	    fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0,
 	    fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt,
@@ -219,7 +216,7 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
 	if (os == PF_OSFP_ANY)
 		return (1);
 	if (list == NULL) {
-		DPFPRINTF("osfp no match against %x\n", os);
+		DPFPRINTF(PF_DEBUG_NOISY, "osfp no match against %x", os);
 		return (os == PF_OSFP_UNKNOWN);
 	}
 	PF_OSFP_UNPACK(os, os_class, os_version, os_subtype);
@@ -228,13 +225,13 @@ pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os)
 		if ((os_class == PF_OSFP_ANY || en_class == os_class) &&
 		    (os_version == PF_OSFP_ANY || en_version == os_version) &&
 		    (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) {
-			DPFPRINTF("osfp matched %s %s %s  %x==%x\n",
+			DPFPRINTF(PF_DEBUG_NOISY, "osfp matched %s %s %s  %x==%x",
 			    entry->fp_class_nm, entry->fp_version_nm,
 			    entry->fp_subtype_nm, os, entry->fp_os);
 			return (1);
 		}
 	}
-	DPFPRINTF("fingerprint 0x%x didn't match\n", os);
+	DPFPRINTF(PF_DEBUG_NOISY, "fingerprint 0x%x didn't match", os);
 	return (0);
 }
 
@@ -275,8 +272,8 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc)
 	fpadd.fp_ttl = fpioc->fp_ttl;
 
 #if 0	/* XXX RYAN wants to fix logging */
-	DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
-	    "(TS=%s,M=%s%d,W=%s%d) %x\n",
+	DPFPRINTF(PF_DEBUG_NOISY, "adding osfp %s %s %s ="
+	    " %s%d:%d:%d:%s%d:0x%llx %d (TS=%s,M=%s%d,W=%s%d) %x",
 	    fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
 	    fpioc->fp_os.fp_subtype_nm,
 	    (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" :
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 2e5165a9900c..43b51f2933f4 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -59,9 +59,6 @@
 #error "Kernel only file. Please use sbin/pfctl/pf_ruleset.c instead."
 #endif
 
-#define DPFPRINTF(format, x...)				\
-	if (V_pf_status.debug >= PF_DEBUG_NOISY)	\
-		printf(format , ##x)
 #define rs_malloc(x)		malloc(x, M_TEMP, M_NOWAIT|M_ZERO)
 #define rs_free(x)		free(x, M_TEMP)
 
@@ -386,7 +383,8 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s,
 			strlcpy(path, s->anchor->path, MAXPATHLEN);
 		while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
 			if (!path[0]) {
-				DPFPRINTF("%s: .. beyond root\n", __func__);
+				DPFPRINTF(PF_DEBUG_NOISY, "%s: .. beyond root",
+				    __func__);
 				rs_free(path);
 				return (1);
 			}
@@ -408,7 +406,7 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s,
 	ruleset = pf_find_or_create_kruleset(path);
 	rs_free(path);
 	if (ruleset == NULL || ruleset == &pf_main_ruleset) {
-		DPFPRINTF("%s: ruleset\n", __func__);
+		DPFPRINTF(PF_DEBUG_NOISY, "%s: ruleset", __func__);
 		return (1);
 	}
 	r->anchor = ruleset->anchor;
@@ -690,7 +688,8 @@ pf_keth_anchor_setup(struct pf_keth_rule *r, const struct pf_keth_ruleset *s,
 			strlcpy(path, s->anchor->path, MAXPATHLEN);
 		while (name[0] == '.' && name[1] == '.' && name[2] == '/') {
 			if (!path[0]) {
-				DPFPRINTF("%s: .. beyond root\n", __func__);
+				DPFPRINTF(PF_DEBUG_NOISY, "%s: .. beyond root",
+				    __func__);
 				rs_free(path);
 				return (1);
 			}
@@ -712,7 +711,7 @@ pf_keth_anchor_setup(struct pf_keth_rule *r, const struct pf_keth_ruleset *s,
 	ruleset = pf_find_or_create_keth_ruleset(path);
 	rs_free(path);
 	if (ruleset == NULL || ruleset->anchor == NULL) {
-		DPFPRINTF("%s: ruleset\n", __func__);
+		DPFPRINTF(PF_DEBUG_NOISY, "%s: ruleset", __func__);
 		return (1);
 	}
 	r->anchor = ruleset->anchor;
diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c
index 66757fa4b756..4a935bc65767 100644
--- a/sys/netpfil/pf/pf_syncookies.c
+++ b/sys/netpfil/pf/pf_syncookies.c
@@ -88,8 +88,6 @@
 #include <net/pfvar.h>
 #include <netpfil/pf/pf_nv.h>
 
-#define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
-
 union pf_syncookie {
 	uint8_t		cookie;
 	struct {
@@ -281,7 +279,7 @@ pf_synflood_check(struct pf_pdesc *pd)
 		    pf_syncookie_rotate, curvnet);
 		V_pf_status.syncookies_active = true;
 		DPFPRINTF(LOG_WARNING,
-		    ("synflood detected, enabling syncookies\n"));
+		    "synflood detected, enabling syncookies");
 		// XXXTODO V_pf_status.lcounters[LCNT_SYNFLOODS]++;
 	}
 
@@ -367,7 +365,7 @@ pf_syncookie_rotate(void *arg)
 	    V_pf_status.syncookies_mode == PF_SYNCOOKIES_NEVER)
 			) {
 		V_pf_status.syncookies_active = false;
-		DPFPRINTF(PF_DEBUG_MISC, ("syncookies disabled\n"));
+		DPFPRINTF(PF_DEBUG_MISC, "syncookies disabled");
 	}
 
 	/* nothing in flight any more? delete keys and return */
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index 9c0151b7da2b..ecc185f89ad7 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -49,8 +49,6 @@
 #include <net/vnet.h>
 #include <net/pfvar.h>
 
-#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
-
 #define	ACCEPT_FLAGS(flags, oklist)		\
 	do {					\
 		if ((flags & ~(oklist)) &	\
@@ -2189,7 +2187,7 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
 	if ((ke == NULL || ke->pfrke_not) != notrule) {
 		if (op_pass != PFR_OP_PASS)
 			DPFPRINTF(PF_DEBUG_URGENT,
-			    ("pfr_update_stats: assertion failed.\n"));
+			    "pfr_update_stats: assertion failed.");
 		op_pass = PFR_OP_XPASS;
 	}
 	pfr_kstate_counter_add(&kt->pfrkt_packets[dir_out][op_pass], 1);
diff --git a/sys/netsmb/smb_conn.c b/sys/netsmb/smb_conn.c
index 259635e2d8d5..ab6cd130a057 100644
--- a/sys/netsmb/smb_conn.c
+++ b/sys/netsmb/smb_conn.c
@@ -422,7 +422,7 @@ smb_vc_create(struct smb_vcspec *vcspec,
 	if (uid == SMBM_ANY_OWNER)
 		uid = realuid;
 	if (gid == SMBM_ANY_GROUP)
-		gid = cred->cr_groups[0];
+		gid = cred->cr_gid;
 	vcp->vc_uid = uid;
 	vcp->vc_grp = gid;
 
@@ -765,7 +765,7 @@ smb_share_create(struct smb_vc *vcp, struct smb_sharespec *shspec,
 	if (uid == SMBM_ANY_OWNER)
 		uid = realuid;
 	if (gid == SMBM_ANY_GROUP)
-		gid = cred->cr_groups[0];
+		gid = cred->cr_gid;
 	ssp = smb_zmalloc(sizeof(*ssp), M_SMBCONN, M_WAITOK);
 	smb_co_init(SSTOCP(ssp), SMBL_SHARE, "smbss ilock", "smbss");
 	ssp->obj.co_free = smb_share_free;
diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner
index 73fa9660e2d2..7a4ff6b9c62e 100644
--- a/sys/riscv/allwinner/files.allwinner
+++ b/sys/riscv/allwinner/files.allwinner
@@ -1,5 +1,6 @@
 
 arm/allwinner/aw_gpio.c			optional gpio aw_gpio fdt
+arm/allwinner/aw_mmc.c			optional mmc aw_mmc fdt | mmccam aw_mmc fdt
 arm/allwinner/aw_rtc.c			optional aw_rtc fdt
 arm/allwinner/aw_syscon.c		optional syscon
 arm/allwinner/aw_sid.c			optional aw_sid nvmem
diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner
index 2b1e0d4e09dc..34fe195b01ba 100644
--- a/sys/riscv/conf/std.allwinner
+++ b/sys/riscv/conf/std.allwinner
@@ -7,6 +7,7 @@ options 	SOC_ALLWINNER_D1
 
 device		aw_ccu		# Allwinner clock controller
 device		aw_gpio		# Allwinner GPIO controller
+device		aw_mmc		# Allwinner SD/MMC controller
 device		aw_rtc		# Allwinner Real-time Clock
 device		aw_sid		# Allwinner Secure ID EFUSE
 device		aw_timer	# Allwinner Timer
diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h
index 856ff0778b95..4d30d5a1c35b 100644
--- a/sys/riscv/include/vmm_dev.h
+++ b/sys/riscv/include/vmm_dev.h
@@ -34,6 +34,8 @@
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
+#include <sys/domainset.h>
+
 #include <machine/vmm.h>
 
 struct vm_memmap {
@@ -56,6 +58,9 @@ struct vm_memseg {
 	int		segid;
 	size_t		len;
 	char		name[VM_MAX_SUFFIXLEN + 1];
+	domainset_t	*ds_mask;
+	size_t		ds_mask_size;
+	int		ds_policy;
 };
 
 struct vm_register {
diff --git a/sys/rpc/authunix_prot.c b/sys/rpc/authunix_prot.c
index 91fb96f44397..7b531946488a 100644
--- a/sys/rpc/authunix_prot.c
+++ b/sys/rpc/authunix_prot.c
@@ -93,9 +93,10 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 
 	if (!xdr_uint32_t(xdrs, &cred->cr_uid))
 		return (FALSE);
-	if (!xdr_uint32_t(xdrs, &cred->cr_groups[0]))
+	if (!xdr_uint32_t(xdrs, &cred->cr_gid))
 		return (FALSE);
 
+	/* XXXKE Fix this is cr_gid gets separated out. */
 	if (xdrs->x_op == XDR_ENCODE) {
 		ngroups = cred->cr_ngroups - 1;
 		if (ngroups > NGRPS)
@@ -105,7 +106,7 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 	if (!xdr_uint32_t(xdrs, &ngroups))
 		return (FALSE);
 	for (i = 0; i < ngroups; i++) {
-		if (i + 1 < ngroups_max + 1) {
+		if (i < ngroups_max) {
 			if (!xdr_uint32_t(xdrs, &cred->cr_groups[i + 1]))
 				return (FALSE);
 		} else {
@@ -115,7 +116,7 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 	}
 
 	if (xdrs->x_op == XDR_DECODE) {
-		if (ngroups + 1 > ngroups_max + 1)
+		if (ngroups > ngroups_max)
 			cred->cr_ngroups = ngroups_max + 1;
 		else
 			cred->cr_ngroups = ngroups + 1;
diff --git a/sys/rpc/svc_auth_unix.c b/sys/rpc/svc_auth_unix.c
index 5d6402a05006..b10ef33be704 100644
--- a/sys/rpc/svc_auth_unix.c
+++ b/sys/rpc/svc_auth_unix.c
@@ -83,12 +83,13 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
 		str_len = RNDUP(str_len);
 		buf += str_len / sizeof (int32_t);
 		xcr->cr_uid = IXDR_GET_UINT32(buf);
-		xcr->cr_groups[0] = IXDR_GET_UINT32(buf);
+		xcr->cr_gid = IXDR_GET_UINT32(buf);
 		gid_len = (size_t)IXDR_GET_UINT32(buf);
 		if (gid_len > NGRPS) {
 			stat = AUTH_BADCRED;
 			goto done;
 		}
+		/* XXXKE Fix this if cr_gid gets separated out. */
 		for (i = 0; i < gid_len; i++) {
 			if (i + 1 < XU_NGROUPS)
 				xcr->cr_groups[i + 1] = IXDR_GET_INT32(buf);
diff --git a/sys/security/audit/audit.c b/sys/security/audit/audit.c
index 05928f1c33e8..7ec50d990d4e 100644
--- a/sys/security/audit/audit.c
+++ b/sys/security/audit/audit.c
@@ -279,7 +279,7 @@ audit_record_ctor(void *mem, int size, void *arg, int flags)
 	cru2x(cred, &ar->k_ar.ar_subj_cred);
 	ar->k_ar.ar_subj_ruid = cred->cr_ruid;
 	ar->k_ar.ar_subj_rgid = cred->cr_rgid;
-	ar->k_ar.ar_subj_egid = cred->cr_groups[0];
+	ar->k_ar.ar_subj_egid = cred->cr_gid;
 	ar->k_ar.ar_subj_auid = cred->cr_audit.ai_auid;
 	ar->k_ar.ar_subj_asid = cred->cr_audit.ai_asid;
 	ar->k_ar.ar_subj_pid = td->td_proc->p_pid;
diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c
index c667d3968817..3ea645373dbe 100644
--- a/sys/security/audit/audit_arg.c
+++ b/sys/security/audit/audit_arg.c
@@ -408,7 +408,7 @@ audit_arg_process(struct proc *p)
 	cred = p->p_ucred;
 	ar->k_ar.ar_arg_auid = cred->cr_audit.ai_auid;
 	ar->k_ar.ar_arg_euid = cred->cr_uid;
-	ar->k_ar.ar_arg_egid = cred->cr_groups[0];
+	ar->k_ar.ar_arg_egid = cred->cr_gid;
 	ar->k_ar.ar_arg_ruid = cred->cr_ruid;
 	ar->k_ar.ar_arg_rgid = cred->cr_rgid;
 	ar->k_ar.ar_arg_asid = cred->cr_audit.ai_asid;
diff --git a/sys/sys/compressor.h b/sys/sys/compressor.h
index cad9080b46ff..e59eeabec2cd 100644
--- a/sys/sys/compressor.h
+++ b/sys/sys/compressor.h
@@ -42,6 +42,7 @@ struct compressor;
 bool		compressor_avail(int format);
 struct compressor *compressor_init(compressor_cb_t cb, int format,
 		    size_t maxiosize, int level, void *arg);
+int		compressor_format(const struct compressor *stream);
 void		compressor_reset(struct compressor *stream);
 int		compressor_write(struct compressor *stream, void *data,
 		    size_t len);
diff --git a/sys/sys/domainset.h b/sys/sys/domainset.h
index f98b175e9bc8..f3dc92ec6383 100644
--- a/sys/sys/domainset.h
+++ b/sys/sys/domainset.h
@@ -113,6 +113,20 @@ void domainset_zero(void);
  * returned value will not match the key pointer.
  */
 struct domainset *domainset_create(const struct domainset *);
+
+/*
+ * Remove empty domains from a given domainset.
+ * Returns 'false' if the domainset consists entirely of empty domains.
+ */
+bool domainset_empty_vm(struct domainset *domain);
+
+/*
+ * Validate and populate a domainset structure according to the specified
+ * policy and mask.
+ */
+int domainset_populate(struct domainset *domain, const domainset_t *mask, int policy,
+    size_t mask_size);
+
 #ifdef _SYS_SYSCTL_H_
 int sysctl_handle_domainset(SYSCTL_HANDLER_ARGS);
 #endif
diff --git a/sys/sys/efi.h b/sys/sys/efi.h
index 95a433a950db..89c8b15519de 100644
--- a/sys/sys/efi.h
+++ b/sys/sys/efi.h
@@ -42,6 +42,8 @@
 	{0xb122a263,0x3661,0x4f68,{0x99,0x29,0x78,0xf8,0xb0,0xd6,0x21,0x80}}
 #define	EFI_PROPERTIES_TABLE			\
 	{0x880aaca3,0x4adc,0x4a04,{0x90,0x79,0xb7,0x47,0x34,0x08,0x25,0xe5}}
+#define	EFI_MEMORY_ATTRIBUTES_TABLE		\
+	{0xdcfa911d,0x26eb,0x469f,{0xa2,0x20,0x38,0xb7,0xdc,0x46,0x12,0x20}}
 #define LINUX_EFI_MEMRESERVE_TABLE			\
 	{0x888eb0c6,0x8ede,0x4ff5,{0xa8,0xf0,0x9a,0xee,0x5c,0xb9,0x77,0xc2}}
 
@@ -166,6 +168,22 @@ struct efi_prop_table {
 	uint64_t	memory_protection_attribute;
 };
 
+struct efi_memory_descriptor {
+	uint32_t	type;
+	caddr_t		phy_addr;
+	caddr_t		virt_addr;
+	uint64_t	pages;
+	uint64_t	attrs;
+};
+
+struct efi_memory_attribute_table {
+	uint32_t	version;
+	uint32_t	num_ents;
+	uint32_t	descriptor_size;
+	uint32_t	flags;
+	struct efi_memory_descriptor tables[];
+};
+
 #ifdef _KERNEL
 
 #ifdef EFIABI_ATTR
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index 4bf114a7c698..580a5372c4db 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -57,16 +57,6 @@ struct ps_strings {
 	unsigned int ps_nenvstr; /* the number of environment strings */
 };
 
-/* Coredump output parameters. */
-struct coredump_params {
-	off_t		offset;
-	struct ucred	*active_cred;
-	struct ucred	*file_cred;
-	struct thread	*td;
-	struct vnode	*vp;
-	struct compressor *comp;
-};
-
 struct image_params;
 
 struct execsw {
@@ -105,16 +95,6 @@ int exec_unregister(const struct execsw *);
 
 enum uio_seg;
 
-#define   CORE_BUF_SIZE   (16 * 1024)
-
-int core_write(struct coredump_params *, const void *, size_t, off_t,
-    enum uio_seg, size_t *);
-int core_output(char *, size_t, off_t, struct coredump_params *, void *);
-int sbuf_drain_core_output(void *, const char *, int);
-
-extern int coredump_pack_fileinfo;
-extern int coredump_pack_vmmapinfo;
-
 /*
  * note: name##_mod cannot be const storage because the
  * linker_file_sysinit() function modifies _file in the
diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h
index cab94ac511a5..80cff53b3576 100644
--- a/sys/sys/exterr_cat.h
+++ b/sys/sys/exterr_cat.h
@@ -18,6 +18,8 @@
 #define	EXTERR_CAT_FUSE		4
 #define	EXTERR_CAT_INOTIFY	5
 #define	EXTERR_CAT_GENIO	6
+#define	EXTERR_CAT_BRIDGE	7
+#define	EXTERR_CAT_SWAP		8
 
 #endif
 
diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h
index c9444e5aec41..2845a9dbc1e2 100644
--- a/sys/sys/imgact_elf.h
+++ b/sys/sys/imgact_elf.h
@@ -45,6 +45,7 @@
     {(pos)->a_type = (id); (pos)->a_un.a_ptr = (ptr); (pos)++;}
 #endif
 
+struct coredump_writer;
 struct image_params;
 struct thread;
 struct vnode;
@@ -114,7 +115,7 @@ bool	__elfN(brand_inuse)(Elf_Brandinfo *entry);
 int	__elfN(insert_brand_entry)(Elf_Brandinfo *entry);
 int	__elfN(remove_brand_entry)(Elf_Brandinfo *entry);
 int	__elfN(freebsd_fixup)(uintptr_t *, struct image_params *);
-int	__elfN(coredump)(struct thread *, struct vnode *, off_t, int);
+int	__elfN(coredump)(struct thread *, struct coredump_writer *, off_t, int);
 size_t	__elfN(populate_note)(int, void *, void *, size_t, void **);
 int	__elfN(freebsd_copyout_auxargs)(struct image_params *, uintptr_t);
 void	__elfN(puthdr)(struct thread *, void *, size_t, int, size_t, int);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 08caa9f49270..24c420e2c976 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -435,7 +435,7 @@ void prison0_init(void);
 bool prison_allow(struct ucred *, unsigned);
 int prison_check(struct ucred *cred1, struct ucred *cred2);
 bool prison_check_nfsd(struct ucred *cred);
-bool prison_owns_vnet(struct ucred *);
+bool prison_owns_vnet(struct prison *pr);
 int prison_canseemount(struct ucred *cred, struct mount *mp);
 void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
     struct statfs *sp);
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index c75094aea450..304bd019c9fc 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -1391,6 +1391,7 @@ extern bool		mb_use_ext_pgs;	/* Use ext_pgs for sendfile */
 #define	PACKET_TAG_PF_REASSEMBLED		31
 #define	PACKET_TAG_IPSEC_ACCEL_OUT		32  /* IPSEC accel out */
 #define	PACKET_TAG_IPSEC_ACCEL_IN		33  /* IPSEC accel in */
+#define	PACKET_TAG_OVPN				34 /* if_ovpn */
 
 /* Specific cookies and tags. */
 
diff --git a/sys/sys/param.h b/sys/sys/param.h
index f941f021a423..33d61e8a1619 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500054
+#define __FreeBSD_version 1500055
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 23e8426b26ee..8f181b7beee6 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -403,6 +403,7 @@ int	sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
 void	sigfastblock_clear(struct thread *td);
 void	sigfastblock_fetch(struct thread *td);
 int	sig_intr(void);
+bool	sig_do_core(int);
 void	siginit(struct proc *p);
 void	signotify(struct thread *td);
 void	sigqueue_delete(struct sigqueue *queue, int sig);
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 7f6234ade6f4..b4593f38f592 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -40,7 +40,7 @@
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
-/* was	SB_NOINTR	0x40		*/
+#define	SB_AUTOLOWAT	0x40		/* sendfile(2) may autotune sb_lowat */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
@@ -210,8 +210,6 @@ typedef enum { SO_RCV, SO_SND } sb_which;
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
-#define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
-#define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index fd183ffbc7a4..8237165b84ce 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -60,6 +60,7 @@ struct rusage;
 struct sched_param;
 struct sembuf;
 union semun;
+struct shmfd;
 struct sockaddr;
 struct spacectl_range;
 struct stat;
@@ -337,7 +338,7 @@ int	kern_shm_open(struct thread *td, const char *userpath, int flags,
 	    mode_t mode, struct filecaps *fcaps);
 int	kern_shm_open2(struct thread *td, const char *path, int flags,
 	    mode_t mode, int shmflags, struct filecaps *fcaps,
-	    const char *name);
+	    const char *name, struct shmfd *shmfd);
 int	kern_shmat(struct thread *td, int shmid, const void *shmaddr,
 	    int shmflg);
 int	kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 4ddfc8516053..1714fa5a7416 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -90,6 +90,7 @@ struct sysent {			/* system call table */
 #define	SY_THR_STATIC_KLD	SY_THR_STATIC
 #endif
 
+struct coredump_writer;
 struct image_params;
 struct proc;
 struct __sigset;
@@ -108,7 +109,8 @@ struct sysentvec {
 	int 		*sv_szsigcode;	/* size of sigtramp code */
 	int		sv_sigcodeoff;
 	char		*sv_name;	/* name of binary type */
-	int		(*sv_coredump)(struct thread *, struct vnode *, off_t, int);
+	int		(*sv_coredump)(struct thread *, struct coredump_writer *,
+			    off_t, int);
 					/* function to dump core, or NULL */
 	int		sv_elf_core_osabi;
 	const char	*sv_elf_core_abi_vendor;
diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h
new file mode 100644
index 000000000000..0a51ee7f50c8
--- /dev/null
+++ b/sys/sys/ucoredump.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (c) 2015 Mark Johnston <markj@FreeBSD.org>
+ * Copyright (c) 2025 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ */
+
+#ifndef _SYS_UCOREDUMP_H_
+#define _SYS_UCOREDUMP_H_
+
+#ifdef _KERNEL
+
+#include <sys/_uio.h>
+#include <sys/blockcount.h>
+#include <sys/queue.h>
+
+/* Coredump output parameters. */
+struct coredump_params;
+struct coredump_writer;
+struct thread;
+struct ucred;
+
+typedef int coredump_init_fn(const struct coredump_writer *,
+    const struct coredump_params *);
+typedef int coredump_write_fn(const struct coredump_writer *, const void *, size_t,
+    off_t, enum uio_seg, struct ucred *, size_t *, struct thread *);
+typedef int coredump_extend_fn(const struct coredump_writer *, off_t,
+    struct ucred *);
+
+struct coredump_vnode_ctx {
+	struct vnode	*vp;
+	struct ucred	*fcred;
+};
+
+coredump_write_fn core_vn_write;
+coredump_extend_fn core_vn_extend;
+
+struct coredump_writer {
+	void			*ctx;
+	coredump_init_fn	*init_fn;
+	coredump_write_fn	*write_fn;
+	coredump_extend_fn	*extend_fn;
+};
+
+struct coredump_params {
+	off_t		offset;
+	struct ucred	*active_cred;
+	struct thread	*td;
+	const struct coredump_writer	*cdw;
+	struct compressor *comp;
+};
+
+#define   CORE_BUF_SIZE   (16 * 1024)
+
+int core_write(struct coredump_params *, const void *, size_t, off_t,
+    enum uio_seg, size_t *);
+int core_output(char *, size_t, off_t, struct coredump_params *, void *);
+int sbuf_drain_core_output(void *, const char *, int);
+
+extern int coredump_pack_fileinfo;
+extern int coredump_pack_vmmapinfo;
+
+extern int compress_user_cores;
+extern int compress_user_cores_level;
+
+typedef int coredumper_probe_fn(struct thread *);
+
+/*
+ * Some arbitrary values for coredumper probes to return.  The highest priority
+ * we can find wins.  It's somewhat expected that a coredumper may want to bid
+ * differently based on the process in question.  Note that probe functions will
+ * be called with the proc lock held, so they must not sleep.
+ */
+#define	COREDUMPER_NOMATCH		(-1)	/* Decline to touch it */
+#define	COREDUMPER_GENERIC		(0)	/* I handle coredumps */
+#define	COREDUMPER_SPECIAL		(50)	/* Special handler */
+#define	COREDUMPER_HIGH_PRIORITY	(100)	/* High-priority handler */
+
+/*
+ * The handle functions will be called with the proc lock held, and should
+ * return with the proc lock dropped.
+ */
+typedef int coredumper_handle_fn(struct thread *, off_t);
+
+struct coredumper {
+	SLIST_ENTRY(coredumper)	 cd_entry;
+	const char		*cd_name;
+	coredumper_probe_fn	*cd_probe;
+	coredumper_handle_fn	*cd_handle;
+	blockcount_t		 cd_refcount;
+};
+
+void coredumper_register(struct coredumper *);
+void coredumper_unregister(struct coredumper *);
+
+#endif	/* _KERNEL */
+#endif	/* _SYS_UCOREDUMP_H_ */
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
index c291c1dc2b95..85ed93fd359d 100644
--- a/sys/sys/unistd.h
+++ b/sys/sys/unistd.h
@@ -156,6 +156,8 @@
 #define	_PC_DEALLOC_PRESENT	65
 #define	_PC_NAMEDATTR_ENABLED	66
 #define	_PC_HAS_NAMEDATTR	67
+#define	_PC_XATTR_ENABLED	_PC_NAMEDATTR_ENABLED	/* Solaris Compatible */
+#define	_PC_XATTR_EXISTS	_PC_HAS_NAMEDATTR	/* Solaris Compatible */
 #define	_PC_HAS_HIDDENSYSTEM	68
 #endif
 
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 2c6947103c94..a416fddcddc3 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -939,7 +939,6 @@ void	vop_mknod_post(void *a, int rc);
 void	vop_open_post(void *a, int rc);
 void	vop_read_post(void *a, int rc);
 void	vop_read_pgcache_post(void *ap, int rc);
-void	vop_readdir_post(void *a, int rc);
 void	vop_reclaim_post(void *a, int rc);
 void	vop_remove_pre(void *a);
 void	vop_remove_post(void *a, int rc);
@@ -1015,7 +1014,36 @@ void	vop_rename_fail(struct vop_rename_args *ap);
 	_error;									\
 })
 
-#define	VOP_WRITE_PRE(ap)						\
+#ifdef INVARIANTS
+#define	vop_readdir_pre_assert(ap)					\
+	ssize_t nresid, oresid;						\
+									\
+	oresid = (ap)->a_uio->uio_resid;
+
+#define	vop_readdir_post_assert(ap, ret)				\
+	nresid = (ap)->a_uio->uio_resid;				\
+	if ((ret) == 0 && (ap)->a_eofflag != NULL) {			\
+		VNASSERT(oresid == 0 || nresid != oresid ||		\
+		    *(ap)->a_eofflag == 1,				\
+		    (ap)->a_vp, ("VOP_READDIR: eofflag not set"));	\
+	}
+#else
+#define	vop_readdir_pre_assert(ap)
+#define	vop_readdir_post_assert(ap, ret)
+#endif
+
+#define	vop_readdir_pre(ap) do {					\
+	vop_readdir_pre_assert(ap)
+
+#define vop_readdir_post(ap, ret)					\
+	vop_readdir_post_assert(ap, ret);				\
+	if ((ret) == 0) {						\
+		VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_READ);		\
+		INOTIFY((ap)->a_vp, IN_ACCESS);				\
+	}								\
+} while (0)
+
+#define	vop_write_pre(ap)						\
 	struct vattr va;						\
 	int error;							\
 	off_t osize, ooffset, noffset;					\
@@ -1029,7 +1057,7 @@ void	vop_rename_fail(struct vop_rename_args *ap);
 		osize = (off_t)va.va_size;				\
 	}
 
-#define VOP_WRITE_POST(ap, ret)						\
+#define vop_write_post(ap, ret)						\
 	noffset = (ap)->a_uio->uio_offset;				\
 	if (noffset > ooffset) {					\
 		if (!VN_KNLIST_EMPTY((ap)->a_vp)) {			\
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 53fac4b0665e..b7453db9013c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1268,7 +1268,8 @@ ufs_rename(
 	struct inode *fip, *tip, *tdp, *fdp;
 	struct direct newdir;
 	off_t endoff;
-	int doingdirectory, newparent;
+	int doingdirectory;
+	u_int newparent;
 	int error = 0;
 	struct mount *mp;
 	ino_t ino;
@@ -1475,7 +1476,7 @@ relock:
 	 * the user must have write permission in the source so
 	 * as to be able to change "..".
 	 */
-	if (doingdirectory && newparent) {
+	if (doingdirectory && newparent != 0) {
 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
 		if (error)
 			goto unlockout;
@@ -1538,7 +1539,7 @@ relock:
 	if (tip == NULL) {
 		if (ITODEV(tdp) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
-		if (doingdirectory && newparent) {
+		if (doingdirectory && newparent != 0) {
 			/*
 			 * Account for ".." in new directory.
 			 * When source and destination have the same
@@ -1631,7 +1632,7 @@ relock:
 			goto bad;
 		}
 		if (doingdirectory) {
-			if (!newparent) {
+			if (newparent == 0) {
 				tdp->i_effnlink--;
 				if (DOINGSOFTDEP(tdvp))
 					softdep_change_linkcnt(tdp);
@@ -1641,11 +1642,11 @@ relock:
 				softdep_change_linkcnt(tip);
 		}
 		error = ufs_dirrewrite(tdp, tip, fip->i_number,
-		    IFTODT(fip->i_mode),
-		    (doingdirectory && newparent) ? newparent : doingdirectory);
+		    IFTODT(fip->i_mode), (doingdirectory && newparent != 0) ?
+		    newparent : doingdirectory);
 		if (error) {
 			if (doingdirectory) {
-				if (!newparent) {
+				if (newparent == 0) {
 					tdp->i_effnlink++;
 					if (DOINGSOFTDEP(tdvp))
 						softdep_change_linkcnt(tdp);
@@ -1668,7 +1669,7 @@ relock:
 			 * disk, so when running with that code we avoid doing
 			 * them now.
 			 */
-			if (!newparent) {
+			if (newparent == 0) {
 				tdp->i_nlink--;
 				DIP_SET_NLINK(tdp, tdp->i_nlink);
 				UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
@@ -1697,7 +1698,7 @@ relock:
 	 * parent directory must be decremented
 	 * and ".." set to point to the new parent.
 	 */
-	if (doingdirectory && newparent) {
+	if (doingdirectory && newparent != 0) {
 		/*
 		 * Set the directory depth based on its new parent.
 		 */
@@ -2064,9 +2065,13 @@ ufs_mkdir(
 				 */
 				ucred.cr_ref = 1;
 				ucred.cr_uid = ip->i_uid;
+
+				/*
+				 * XXXKE Fix this is cr_gid gets separated out
+				 */
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups = &ucred_group;
-				ucred.cr_groups[0] = dp->i_gid;
+				ucred.cr_gid = ucred_group = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
@@ -2823,9 +2828,13 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
 			 */
 			ucred.cr_ref = 1;
 			ucred.cr_uid = ip->i_uid;
+
+			/*
+			 * XXXKE Fix this is cr_gid gets separated out
+			 */
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups = &ucred_group;
-			ucred.cr_groups[0] = pdir->i_gid;
+			ucred.cr_gid = ucred_group = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else {
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index d6bd06226d04..327cac661044 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -65,9 +65,9 @@
  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
  */
 
-#include <sys/cdefs.h>
 #include "opt_vm.h"
 
+#define	EXTERR_CATEGORY		EXTERR_CAT_SWAP
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/blist.h>
@@ -76,6 +76,7 @@
 #include <sys/disk.h>
 #include <sys/disklabel.h>
 #include <sys/eventhandler.h>
+#include <sys/exterrvar.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
@@ -2686,7 +2687,7 @@ swapon_check_swzone(void)
 	}
 }
 
-static void
+static int
 swaponsomething(struct vnode *vp, void *id, u_long nblks,
     sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
 {
@@ -2701,6 +2702,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
 	 */
 	nblks &= ~(ctodb(1) - 1);
 	nblks = dbtoc(nblks);
+	if (nblks == 0)
+		return (EXTERROR(EINVAL, "swap device too small"));
 
 	sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO);
 	sp->sw_blist = blist_create(nblks, M_WAITOK);
@@ -2742,6 +2745,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
 	EVENTHANDLER_INVOKE(swapon, sp);
+
+	return (0);
 }
 
 /*
@@ -3286,10 +3291,10 @@ swapongeom_locked(struct cdev *dev, struct vnode *vp)
 		return (error);
 	}
 	nblks = pp->mediasize / DEV_BSIZE;
-	swaponsomething(vp, cp, nblks, swapgeom_strategy,
+	error = swaponsomething(vp, cp, nblks, swapgeom_strategy,
 	    swapgeom_close, dev2udev(dev),
 	    (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
-	return (0);
+	return (error);
 }
 
 static int
@@ -3378,9 +3383,9 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 	if (error != 0)
 		return (error);
 
-	swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
+	error = swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
 	    NODEV, 0);
-	return (0);
+	return (error);
 }
 
 static int
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index bbae55895c2c..b239a6ffb4ce 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -396,7 +396,7 @@ vm_page_blacklist_load(char **list, char **end)
         }
 	*list = ptr;
 	if (ptr != NULL)
-		*end = ptr + len;
+		*end = ptr + len - 1;
 	else
 		*end = NULL;
 	return;