85 files changed, 1886 insertions, 508 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 6e51ebff298a..5bb877a174f7 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -49,12 +49,6 @@
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
-#ifdef SMP
-#define LK	lock ;
-#else
-#define LK
-#endif
-
 	.text
 	SUPERALIGN_TEXT
 	/* End Of Interrupt to APIC */
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index 413b7c74890e..851f2df0e6e1 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -105,8 +105,8 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
 			 * PAGE_SIZE, the uiomove() call does not
 			 * access past the end of the direct map.
 			 */
-			if (v >= DMAP_MIN_ADDRESS &&
-			    v < DMAP_MIN_ADDRESS + dmaplimit) {
+			if (v >= kva_layout.dmap_low &&
+			    v < kva_layout.dmap_high) {
 				error = uiomove((void *)v, c, uio);
 				break;
 			}
diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c
index 6d0917e16099..43bf81a991bf 100644
--- a/sys/amd64/amd64/minidump_machdep.c
+++ b/sys/amd64/amd64/minidump_machdep.c
@@ -186,7 +186,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
 	 * tables, so care must be taken to read each entry only once.
 	 */
 	pmapsize = 0;
-	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; ) {
+	for (va = kva_layout.km_low; va < kva_end; ) {
 		/*
 		 * We always write a page, even if it is zero. Each
 		 * page written corresponds to 1GB of space
@@ -279,9 +279,9 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
 	mdhdr.msgbufsize = mbp->msg_size;
 	mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
 	mdhdr.pmapsize = pmapsize;
-	mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS;
-	mdhdr.dmapbase = DMAP_MIN_ADDRESS;
-	mdhdr.dmapend = DMAP_MAX_ADDRESS;
+	mdhdr.kernbase = kva_layout.km_low;
+	mdhdr.dmapbase = kva_layout.dmap_low;
+	mdhdr.dmapend = kva_layout.dmap_high;
 	mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
 
 	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION,
@@ -323,7 +323,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
 
 	/* Dump kernel page directory pages */
 	bzero(fakepd, sizeof(fakepd));
-	for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += NBPDP) {
+	for (va = kva_layout.km_low; va < kva_end; va += NBPDP) {
 		ii = pmap_pml4e_index(va);
 		pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii;
 		pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 9c985df13ddf..2c7777e608b9 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -415,7 +415,7 @@ SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0,
 
 static int ndmpdp;
 vm_paddr_t dmaplimit;
-vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
+vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS_LA48;
 pt_entry_t pg_nx;
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@@ -475,11 +475,36 @@ _Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow");
 static pml4_entry_t	*kernel_pml4;
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
+static u_int64_t	DMPML4phys;	/* ... level 4, for la57 */
 static int		ndmpdpphys;	/* number of DMPDPphys pages */
 
 vm_paddr_t		kernphys;	/* phys addr of start of bootstrap data */
 vm_paddr_t		KERNend;	/* and the end */
 
+struct kva_layout_s	kva_layout = {
+	.kva_min =	KV4ADDR(PML4PML4I, 0, 0, 0),
+	.dmap_low =	KV4ADDR(DMPML4I, 0, 0, 0),
+	.dmap_high =	KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0),
+	.lm_low =	KV4ADDR(LMSPML4I, 0, 0, 0),
+	.lm_high =	KV4ADDR(LMEPML4I + 1, 0, 0, 0),
+	.km_low =	KV4ADDR(KPML4BASE, 0, 0, 0),
+	.km_high =	KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1,
+			    NPDEPG - 1, NPTEPG - 1),
+	.rec_pt =	KV4ADDR(PML4PML4I, 0, 0, 0),
+};
+
+struct kva_layout_s	kva_layout_la57 = {
+	.kva_min =	KV5ADDR(NPML5EPG / 2, 0, 0, 0, 0),	/* == rec_pt */
+	.dmap_low =	KV5ADDR(DMPML5I, 0, 0, 0, 0),
+	.dmap_high =	KV5ADDR(DMPML5I + NDMPML5E, 0, 0, 0, 0),
+	.lm_low =	KV4ADDR(LMSPML4I, 0, 0, 0),
+	.lm_high =	KV4ADDR(LMEPML4I + 1, 0, 0, 0),
+	.km_low =	KV4ADDR(KPML4BASE, 0, 0, 0),
+	.km_high =	KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1,
+			    NPDEPG - 1, NPTEPG - 1),
+	.rec_pt =	KV5ADDR(PML5PML5I, 0, 0, 0, 0),
+};
+
 /*
  * pmap_mapdev support pre initialization (i.e. console)
  */
@@ -549,8 +574,8 @@ static int pmap_flags = PMAP_PDE_SUPERPAGE;	/* flags for x86 pmaps */
 
 static vmem_t *large_vmem;
 static u_int lm_ents;
-#define	PMAP_ADDRESS_IN_LARGEMAP(va)	((va) >= LARGEMAP_MIN_ADDRESS && \
-	(va) < LARGEMAP_MIN_ADDRESS + NBPML4 * (u_long)lm_ents)
+#define	PMAP_ADDRESS_IN_LARGEMAP(va)	((va) >= kva_layout.lm_low && \
+	(va) < kva_layout.lm_high)
 
 int pmap_pcid_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
@@ -1336,7 +1361,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va);
 static pd_entry_t *pmap_pti_pde(vm_offset_t va);
 static void pmap_pti_wire_pte(void *pte);
 static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
-    bool remove_pt, struct spglist *free, struct rwlock **lockp);
+    bool demote_kpde, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
@@ -1722,7 +1747,7 @@ create_pagetables(vm_paddr_t *firstaddr)
 {
 	pd_entry_t *pd_p;
 	pdp_entry_t *pdp_p;
-	pml4_entry_t *p4_p;
+	pml4_entry_t *p4_p, *p4d_p;
 	pml5_entry_t *p5_p;
 	uint64_t DMPDkernphys;
 	vm_paddr_t pax;
@@ -1732,7 +1757,7 @@ create_pagetables(vm_paddr_t *firstaddr)
 	vm_offset_t kasankernbase;
 	int kasankpdpi, kasankpdi, nkasanpte;
 #endif
-	int i, j, ndm1g, nkpdpe, nkdmpde;
+	int i, j, ndm1g, nkpdpe, nkdmpde, ndmpml4phys;
 
 	TSENTER();
 	/* Allocate page table pages for the direct map */
@@ -1740,15 +1765,30 @@ create_pagetables(vm_paddr_t *firstaddr)
 	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
 		ndmpdp = 4;
 	ndmpdpphys = howmany(ndmpdp, NPDPEPG);
-	if (ndmpdpphys > NDMPML4E) {
-		/*
-		 * Each NDMPML4E allows 512 GB, so limit to that,
-		 * and then readjust ndmpdp and ndmpdpphys.
-		 */
-		printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
-		Maxmem = atop(NDMPML4E * NBPML4);
-		ndmpdpphys = NDMPML4E;
-		ndmpdp = NDMPML4E * NPDEPG;
+	if (la57) {
+		ndmpml4phys = howmany(ndmpdpphys, NPML4EPG);
+		if (ndmpml4phys > NDMPML5E) {
+			printf("NDMPML5E limits system to %ld GB\n",
+			    (u_long)NDMPML5E * NBPML5 / 1024 / 1024 / 1024);
+			Maxmem = atop(NDMPML5E * NBPML5);
+			ndmpml4phys = NDMPML5E;
+			ndmpdpphys = ndmpml4phys * NPML4EPG;
+			ndmpdp = ndmpdpphys * NPDEPG;
+		}
+		DMPML4phys = allocpages(firstaddr, ndmpml4phys);
+	} else {
+		if (ndmpdpphys > NDMPML4E) {
+			/*
+			 * Each NDMPML4E allows 512 GB, so limit to
+			 * that, and then readjust ndmpdp and
+			 * ndmpdpphys.
+			 */
+			printf("NDMPML4E limits system to %d GB\n",
+			    NDMPML4E * 512);
+			Maxmem = atop(NDMPML4E * NBPML4);
+			ndmpdpphys = NDMPML4E;
+			ndmpdp = NDMPML4E * NPDEPG;
+		}
 	}
 	DMPDPphys = allocpages(firstaddr, ndmpdpphys);
 	ndm1g = 0;
@@ -1773,7 +1813,13 @@ create_pagetables(vm_paddr_t *firstaddr)
 	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
 
 	/* Allocate pages. */
+	if (la57) {
+		KPML5phys = allocpages(firstaddr, 1);
+		p5_p = (pml5_entry_t *)KPML5phys;
+	}
 	KPML4phys = allocpages(firstaddr, 1);
+	p4_p = (pml4_entry_t *)KPML4phys;
+
 	KPDPphys = allocpages(firstaddr, NKPML4E);
 #ifdef KASAN
 	KASANPDPphys = allocpages(firstaddr, NKASANPML4E);
@@ -1893,6 +1939,16 @@ create_pagetables(vm_paddr_t *firstaddr)
 	}
 
 	/*
+	 * Connect the Direct Map slots up to the PML4.
+	 * pml5 entries for DMAP are handled below in global pml5 loop.
+	 */
+	p4d_p = la57 ? (pml4_entry_t *)DMPML4phys : &p4_p[DMPML4I];
+	for (i = 0; i < ndmpdpphys; i++) {
+		p4d_p[i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
+		    pg_nx;
+	}
+
+	/*
 	 * Instead of using a 1G page for the memory containing the kernel,
 	 * use 2M pages with read-only and no-execute permissions.  (If using 1G
 	 * pages, this will partially overwrite the PDPEs above.)
@@ -1911,11 +1967,6 @@ create_pagetables(vm_paddr_t *firstaddr)
 		}
 	}
 
-	/* And recursively map PML4 to itself in order to get PTmap */
-	p4_p = (pml4_entry_t *)KPML4phys;
-	p4_p[PML4PML4I] = KPML4phys;
-	p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx;
-
 #ifdef KASAN
 	/* Connect the KASAN shadow map slots up to the PML4. */
 	for (i = 0; i < NKASANPML4E; i++) {
@@ -1938,25 +1989,15 @@ create_pagetables(vm_paddr_t *firstaddr)
 	}
 #endif
 
-	/* Connect the Direct Map slots up to the PML4. */
-	for (i = 0; i < ndmpdpphys; i++) {
-		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
-		p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
-	}
-
 	/* Connect the KVA slots up to the PML4 */
 	for (i = 0; i < NKPML4E; i++) {
 		p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
 		p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V;
 	}
 
-	kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
-
 	if (la57) {
 		/* XXXKIB bootstrap KPML5phys page is lost */
-		KPML5phys = allocpages(firstaddr, 1);
-		for (i = 0, p5_p = (pml5_entry_t *)KPML5phys; i < NPML5EPG;
-		    i++) {
+		for (i = 0; i < NPML5EPG; i++) {
 			if (i == PML5PML5I) {
 				/*
 				 * Recursively map PML5 to itself in
@@ -1964,6 +2005,10 @@ create_pagetables(vm_paddr_t *firstaddr)
 				 */
 				p5_p[i] = KPML5phys | X86_PG_RW | X86_PG_A |
 				    X86_PG_M | X86_PG_V | pg_nx;
+			} else if (i >= DMPML5I && i < DMPML5I + NDMPML5E) {
+				/* Connect DMAP pml4 pages to PML5. */
+				p5_p[i] = (DMPML4phys + ptoa(i - DMPML5I)) |
+				    X86_PG_RW | X86_PG_V | pg_nx;
 			} else if (i == pmap_pml5e_index(UPT_MAX_ADDRESS)) {
 				p5_p[i] = KPML4phys | X86_PG_RW | X86_PG_A |
 				    X86_PG_M | X86_PG_V;
@@ -1971,6 +2016,10 @@ create_pagetables(vm_paddr_t *firstaddr)
 				p5_p[i] = 0;
 			}
 		}
+	} else {
+		/* Recursively map PML4 to itself in order to get PTmap */
+		p4_p[PML4PML4I] = KPML4phys;
+		p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx;
 	}
 	TSEXIT();
 }
@@ -2024,7 +2073,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	 */
 	virtual_avail = (vm_offset_t)KERNSTART + round_2mpage(KERNend -
 	    (vm_paddr_t)kernphys);
-	virtual_end = VM_MAX_KERNEL_ADDRESS;
+	virtual_end = kva_layout.km_high;
 
 	/*
 	 * Enable PG_G global pages, then switch to the kernel page
@@ -2046,9 +2095,13 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	 * Initialize the kernel pmap (which is statically allocated).
 	 * Count bootstrap data as being resident in case any of this data is
 	 * later unmapped (using pmap_remove()) and freed.
+	 *
+	 * DMAP_TO_PHYS()/PHYS_TO_DMAP() are functional only after
+	 * kva_layout is fixed.
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	if (la57) {
+		kva_layout = kva_layout_la57;
 		vtoptem = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT +
 		    NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3;
 		PTmap = (vm_offset_t)P5Tmap;
@@ -2059,6 +2112,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 		kernel_pmap->pm_cr3 = KPML5phys;
 		pmap_pt_page_count_adj(kernel_pmap, 1);	/* top-level page */
 	} else {
+		kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
 		kernel_pmap->pm_pmltop = kernel_pml4;
 		kernel_pmap->pm_cr3 = KPML4phys;
 	}
@@ -2420,6 +2474,7 @@ pmap_init(void)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_page_t m, mpte;
+	pml4_entry_t *pml4e;
 	int error, i, ret, skz63;
 
 	/* L1TF, reserve page @0 unconditionally */
@@ -2559,18 +2614,19 @@ pmap_init(void)
 		printf("pmap: large map %u PML4 slots (%lu GB)\n",
 		    lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024));
 	if (lm_ents != 0) {
-		large_vmem = vmem_create("large", LARGEMAP_MIN_ADDRESS,
-		    (vmem_size_t)lm_ents * NBPML4, PAGE_SIZE, 0, M_WAITOK);
+		large_vmem = vmem_create("large", kva_layout.lm_low,
+		    (vmem_size_t)kva_layout.lm_high - kva_layout.lm_low,
+		    PAGE_SIZE, 0, M_WAITOK);
 		if (large_vmem == NULL) {
 			printf("pmap: cannot create large map\n");
 			lm_ents = 0;
 		}
 		for (i = 0; i < lm_ents; i++) {
 			m = pmap_large_map_getptp_unlocked();
-			/* XXXKIB la57 */
-			kernel_pml4[LMSPML4I + i] = X86_PG_V |
-			    X86_PG_RW | X86_PG_A | X86_PG_M | pg_nx |
-			    VM_PAGE_TO_PHYS(m);
+			pml4e = pmap_pml4e(kernel_pmap, kva_layout.lm_low +
+			    (u_long)i * NBPML4);
+			*pml4e = X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M |
+			    pg_nx | VM_PAGE_TO_PHYS(m);
 		}
 	}
 }
@@ -3899,7 +3955,7 @@ pmap_kextract(vm_offset_t va)
 	pd_entry_t pde;
 	vm_paddr_t pa;
 
-	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
+	if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high) {
 		pa = DMAP_TO_PHYS(va);
 	} else if (PMAP_ADDRESS_IN_LARGEMAP(va)) {
 		pa = pmap_large_map_kextract(va);
@@ -4040,7 +4096,7 @@ pmap_qremove(vm_offset_t sva, int count)
 		 * enough to one of those pmap_enter() calls for it to
 		 * be caught up in a promotion.
 		 */
-		KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va));
+		KASSERT(va >= kva_layout.km_low, ("usermode va %lx", va));
 		KASSERT((*vtopde(va) & X86_PG_PS) == 0,
 		    ("pmap_qremove on promoted va %#lx", va));
 
@@ -4328,21 +4384,13 @@ void
 pmap_pinit_pml5(vm_page_t pml5pg)
 {
 	pml5_entry_t *pm_pml5;
+	int i;
 
 	pm_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml5pg));
-
-	/*
-	 * Add pml5 entry at top of KVA pointing to existing pml4 table,
-	 * entering all existing kernel mappings into level 5 table.
-	 */
-	pm_pml5[pmap_pml5e_index(UPT_MAX_ADDRESS)] = KPML4phys | X86_PG_V |
-	    X86_PG_RW | X86_PG_A | X86_PG_M;
-
-	/*
-	 * Install self-referential address mapping entry.
-	 */
-	pm_pml5[PML5PML5I] = VM_PAGE_TO_PHYS(pml5pg) |
-	    X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A;
+	for (i = 0; i < NPML5EPG / 2; i++)
+		pm_pml5[i] = 0;
+	for (; i < NPML5EPG; i++)
+		pm_pml5[i] = kernel_pmap->pm_pmltop[i];
 }
 
 static void
@@ -4899,8 +4947,8 @@ pmap_release(pmap_t pmap)
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pmltop));
 
 	if (pmap_is_la57(pmap)) {
-		pmap->pm_pmltop[pmap_pml5e_index(UPT_MAX_ADDRESS)] = 0;
-		pmap->pm_pmltop[PML5PML5I] = 0;
+		for (i = NPML5EPG / 2; i < NPML5EPG; i++)
+			pmap->pm_pmltop[i] = 0;
 	} else {
 		for (i = 0; i < NKPML4E; i++)	/* KVA */
 			pmap->pm_pmltop[KPML4BASE + i] = 0;
@@ -4942,7 +4990,7 @@ pmap_release(pmap_t pmap)
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
-	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
+	unsigned long ksize = kva_layout.km_high - kva_layout.km_low;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
@@ -4953,7 +5001,7 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
-	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+	unsigned long kfree = kva_layout.km_high - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
@@ -5031,7 +5079,7 @@ pmap_page_array_startup(long pages)
 
 	vm_page_array_size = pages;
 
-	start = VM_MIN_KERNEL_ADDRESS;
+	start = kva_layout.km_low;
 	end = start + pages * sizeof(struct vm_page);
 	for (va = start; va < end; va += NBPDR) {
 		pfn = first_page + (va - start) / sizeof(struct vm_page);
@@ -6067,8 +6115,8 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 			 * so the direct map region is the only part of the
 			 * kernel address space that must be handled here.
 			 */
-			KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
-			    va < DMAP_MAX_ADDRESS),
+			KASSERT(!in_kernel || (va >= kva_layout.dmap_low &&
+			    va < kva_layout.dmap_high),
 			    ("pmap_demote_pde: No saved mpte for va %#lx", va));
 
 			/*
@@ -6165,8 +6213,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
  * pmap_remove_kernel_pde: Remove a kernel superpage mapping.
  */
 static void
-pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
-    bool remove_pt)
+pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	vm_paddr_t mptepa;
@@ -6174,12 +6221,8 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 
 	KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if (remove_pt)
-		mpte = pmap_remove_pt_page(pmap, va);
-	else
-		mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va));
-	if (mpte == NULL)
-		panic("pmap_remove_kernel_pde: Missing pt page.");
+	mpte = pmap_remove_pt_page(pmap, va);
+	KASSERT(mpte != NULL, ("pmap_remove_kernel_pde: missing pt page"));
 
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V;
@@ -6209,7 +6252,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static int
-pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt,
+pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde,
     struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
@@ -6249,9 +6292,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt,
 			pmap_delayed_invl_page(m);
 		}
 	}
-	if (pmap == kernel_pmap) {
-		pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt);
-	} else {
+	if (pmap != kernel_pmap) {
 		mpte = pmap_remove_pt_page(pmap, sva);
 		if (mpte != NULL) {
 			KASSERT(vm_page_any_valid(mpte),
@@ -6262,6 +6303,14 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt,
 			mpte->ref_count = 0;
 			pmap_add_delayed_free_list(mpte, free, false);
 		}
+	} else if (demote_kpde) {
+		pmap_remove_kernel_pde(pmap, pdq, sva);
+	} else {
+		mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva));
+		if (vm_page_any_valid(mpte)) {
+			mpte->valid = 0;
+			pmap_zero_page(mpte);
+		}
 	}
 	return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
 }
@@ -7183,7 +7232,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	PG_RW = pmap_rw_bit(pmap);
 
 	va = trunc_page(va);
-	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
+	KASSERT(va <= kva_layout.km_high, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
 	    va));
@@ -7573,8 +7622,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
 			 * the mapping is not from kernel_pmap, then
 			 * a reserved PT page could be freed.
 			 */
-			(void)pmap_remove_pde(pmap, pde, va,
-			    pmap != kernel_pmap, &free, lockp);
+			(void)pmap_remove_pde(pmap, pde, va, false, &free,
+			    lockp);
 			if ((oldpde & PG_G) == 0)
 				pmap_invalidate_pde_page(pmap, va, oldpde);
 		} else {
@@ -7584,10 +7633,9 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
 				 * before any changes to mappings are
 				 * made.  Abort on failure.
 				 */
-				mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
-				if (pmap_insert_pt_page(pmap, mt, false, false)) {
-					if (pdpg != NULL)
-						pdpg->ref_count--;
+				mt = PHYS_TO_VM_PAGE(oldpde & PG_FRAME);
+				if (pmap_insert_pt_page(pmap, mt, false,
+				    false)) {
 					CTR1(KTR_PMAP,
 			    "pmap_enter_pde: cannot ins kern ptp va %#lx",
 					    va);
@@ -9550,7 +9598,7 @@ pmap_unmapdev(void *p, vm_size_t size)
 	va = (vm_offset_t)p;
 
 	/* If we gave a direct map region in pmap_mapdev, do nothing */
-	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
+	if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high)
 		return;
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
@@ -9649,6 +9697,8 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m)
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
+	if (m->md.pat_mode == ma)
+		return;
 
 	m->md.pat_mode = ma;
 
@@ -9668,6 +9718,9 @@ pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma)
 {
 	int error;
 
+	if (m->md.pat_mode == ma)
+		return;
+
 	m->md.pat_mode = ma;
 
 	if ((m->flags & PG_FICTITIOUS) != 0)
@@ -9724,7 +9777,7 @@ pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot)
 	int error;
 
 	/* Only supported within the kernel map. */
-	if (va < VM_MIN_KERNEL_ADDRESS)
+	if (va < kva_layout.km_low)
 		return (EINVAL);
 
 	PMAP_LOCK(kernel_pmap);
@@ -9755,7 +9808,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
 	 * Only supported on kernel virtual addresses, including the direct
 	 * map but excluding the recursive map.
 	 */
-	if (base < DMAP_MIN_ADDRESS)
+	if (base < kva_layout.dmap_low)
 		return (EINVAL);
 
 	/*
@@ -9778,7 +9831,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
 			pte_bits |= X86_PG_RW;
 		}
 		if ((prot & VM_PROT_EXECUTE) == 0 ||
-		    va < VM_MIN_KERNEL_ADDRESS) {
+		    va < kva_layout.km_low) {
 			pde_bits |= pg_nx;
 			pte_bits |= pg_nx;
 		}
@@ -9874,7 +9927,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
 				pmap_pte_props(pdpe, pde_bits, pde_mask);
 				changed = true;
 			}
-			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
+			if (tmpva >= kva_layout.km_low &&
 			    (*pdpe & PG_PS_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
@@ -9904,7 +9957,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
 				pmap_pte_props(pde, pde_bits, pde_mask);
 				changed = true;
 			}
-			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
+			if (tmpva >= kva_layout.km_low &&
 			    (*pde & PG_PS_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
@@ -9932,7 +9985,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
 				pmap_pte_props(pte, pte_bits, pte_mask);
 				changed = true;
 			}
-			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
+			if (tmpva >= kva_layout.km_low &&
 			    (*pte & PG_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
@@ -10904,8 +10957,8 @@ pmap_large_unmap(void *svaa, vm_size_t len)
 	struct spglist spgf;
 
 	sva = (vm_offset_t)svaa;
-	if (len == 0 || sva + len < sva || (sva >= DMAP_MIN_ADDRESS &&
-	    sva + len <= DMAP_MIN_ADDRESS + dmaplimit))
+	if (len == 0 || sva + len < sva || (sva >= kva_layout.dmap_low &&
+	    sva + len < kva_layout.dmap_high))
 		return;
 
 	SLIST_INIT(&spgf);
@@ -11151,11 +11204,10 @@ pmap_large_map_wb(void *svap, vm_size_t len)
 	sva = (vm_offset_t)svap;
 	eva = sva + len;
 	pmap_large_map_wb_fence();
-	if (sva >= DMAP_MIN_ADDRESS && eva <= DMAP_MIN_ADDRESS + dmaplimit) {
+	if (sva >= kva_layout.dmap_low && eva < kva_layout.dmap_high) {
 		pmap_large_map_flush_range(sva, len);
 	} else {
-		KASSERT(sva >= LARGEMAP_MIN_ADDRESS &&
-		    eva <= LARGEMAP_MIN_ADDRESS + lm_ents * NBPML4,
+		KASSERT(sva >= kva_layout.lm_low && eva < kva_layout.lm_high,
 		    ("pmap_large_map_wb: not largemap %#lx %#lx", sva, len));
 		pmap_large_map_wb_large(sva, eva);
 	}
@@ -11196,8 +11248,8 @@ pmap_pti_init(void)
 	VM_OBJECT_WLOCK(pti_obj);
 	pml4_pg = pmap_pti_alloc_page();
 	pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg));
-	for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS &&
-	    va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) {
+	for (va = kva_layout.km_low; va <= kva_layout.km_high &&
+	    va >= kva_layout.km_low && va > NBPML4; va += NBPML4) {
 		pdpe = pmap_pti_pdpe(va);
 		pmap_pti_wire_pte(pdpe);
 	}
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 09ac0a67dbef..eefddad2f142 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -769,7 +769,7 @@ trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode)
 			return (-1);
 		}
 	}
-	if (eva >= VM_MIN_KERNEL_ADDRESS) {
+	if (eva >= kva_layout.km_low) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index 8db314fa034d..1bbb302259d6 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -146,8 +146,9 @@
 #define	amd64_btop(x)	((unsigned long)(x) >> PAGE_SHIFT)
 #define	amd64_ptob(x)	((unsigned long)(x) << PAGE_SHIFT)
 
-#define	INKERNEL(va) (((va) >= DMAP_MIN_ADDRESS && (va) < DMAP_MAX_ADDRESS) \
-    || ((va) >= VM_MIN_KERNEL_ADDRESS && (va) < VM_MAX_KERNEL_ADDRESS))
+#define	INKERNEL(va)	\
+    (((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) || \
+    ((va) >= kva_layout.km_low && (va) < kva_layout.km_high))
 
 #ifdef SMP
 #define SC_TABLESIZE    1024                     /* Must be power of 2. */
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 7d3e91bcd9b9..08e96027a5ed 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -169,11 +169,12 @@
  * the recursive page table map.
  */
 #define	NDMPML4E	8
+#define	NDMPML5E	32
 
 /*
- * These values control the layout of virtual memory.  The starting address
- * of the direct map, which is controlled by DMPML4I, must be a multiple of
- * its size.  (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.)
+ * These values control the layout of virtual memory.  The starting
+ * address of the direct map is controlled by DMPML4I on LA48 and
+ * DMPML5I on LA57.
  *
  * Note: KPML4I is the index of the (single) level 4 page that maps
  * the KVA that holds KERNBASE, while KPML4BASE is the index of the
@@ -191,6 +192,7 @@
 
 #define	KPML4BASE	(NPML4EPG-NKPML4E) /* KVM at highest addresses */
 #define	DMPML4I		rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
+#define	DMPML5I		(NPML5EPG / 2 + 1)
 
 #define	KPML4I		(NPML4EPG-1)
 #define	KPDPI		(NPDPEPG-2)	/* kernbase at -2GB */
@@ -548,6 +550,18 @@ pmap_pml5e_index(vm_offset_t va)
 	return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1));
 }
 
+struct kva_layout_s {
+	vm_offset_t kva_min;
+	vm_offset_t dmap_low;	/* DMAP_MIN_ADDRESS */
+	vm_offset_t dmap_high;	/* DMAP_MAX_ADDRESS */
+	vm_offset_t lm_low;	/* LARGEMAP_MIN_ADDRESS */
+	vm_offset_t lm_high;	/* LARGEMAP_MAX_ADDRESS */
+	vm_offset_t km_low;	/* VM_MIN_KERNEL_ADDRESS */
+	vm_offset_t km_high;	/* VM_MAX_KERNEL_ADDRESS */
+	vm_offset_t rec_pt;
+};
+extern struct kva_layout_s kva_layout;
+
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 0cd9bb4fa7a4..e4cc05cbb889 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -163,6 +163,7 @@
  * Virtual addresses of things.  Derived from the page directory and
  * page table indexes from pmap.h for precision.
  *
+ * LA48:
  * 0x0000000000000000 - 0x00007fffffffffff   user map
  * 0x0000800000000000 - 0xffff7fffffffffff   does not exist (hole)
  * 0xffff800000000000 - 0xffff804020100fff   recursive page table (512GB slot)
@@ -175,18 +176,29 @@
  * 0xfffffc0000000000 - 0xfffffdffffffffff   2TB KMSAN shadow map, optional
  * 0xfffffe0000000000 - 0xffffffffffffffff   2TB kernel map
  *
+ * LA57:
+ * 0x0000000000000000 - 0x00ffffffffffffff   user map
+ * 0x0100000000000000 - 0xf0ffffffffffffff   does not exist (hole)
+ * 0xff00000000000000 - 0xff00ffffffffffff   recursive page table (2048TB slot)
+ * 0xff01000000000000 - 0xff20ffffffffffff   direct map (32 x 2048TB slots)
+ * 0xff21000000000000 - 0xffff807fffffffff   unused
+ * 0xffff808000000000 - 0xffff847fffffffff   large map (can be tuned up)
+ * 0xffff848000000000 - 0xfffff77fffffffff   unused (large map extends there)
+ * 0xfffff60000000000 - 0xfffff7ffffffffff   2TB KMSAN origin map, optional
+ * 0xfffff78000000000 - 0xfffff7bfffffffff   512GB KASAN shadow map, optional
+ * 0xfffff80000000000 - 0xfffffbffffffffff   4TB unused
+ * 0xfffffc0000000000 - 0xfffffdffffffffff   2TB KMSAN shadow map, optional
+ * 0xfffffe0000000000 - 0xffffffffffffffff   2TB kernel map
+ *
  * Within the kernel map:
  *
  * 0xfffffe0000000000                        vm_page_array
  * 0xffffffff80000000                        KERNBASE
  */
 
-#define	VM_MIN_KERNEL_ADDRESS	KV4ADDR(KPML4BASE, 0, 0, 0)
-#define	VM_MAX_KERNEL_ADDRESS	KV4ADDR(KPML4BASE + NKPML4E - 1, \
-					NPDPEPG-1, NPDEPG-1, NPTEPG-1)
-
-#define	DMAP_MIN_ADDRESS	KV4ADDR(DMPML4I, 0, 0, 0)
-#define	DMAP_MAX_ADDRESS	KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0)
+#define	VM_MIN_KERNEL_ADDRESS_LA48	KV4ADDR(KPML4BASE, 0, 0, 0)
+#define	VM_MIN_KERNEL_ADDRESS		kva_layout.km_low
+#define	VM_MAX_KERNEL_ADDRESS		kva_layout.km_high
 
 #define	KASAN_MIN_ADDRESS	KV4ADDR(KASANPML4I, 0, 0, 0)
 #define	KASAN_MAX_ADDRESS	KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0)
@@ -199,9 +211,6 @@
 #define	KMSAN_ORIG_MAX_ADDRESS	KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \
 					0, 0, 0)
 
-#define	LARGEMAP_MIN_ADDRESS	KV4ADDR(LMSPML4I, 0, 0, 0)
-#define	LARGEMAP_MAX_ADDRESS	KV4ADDR(LMEPML4I + 1, 0, 0, 0)
-
 /*
  * Formally kernel mapping starts at KERNBASE, but kernel linker
  * script leaves first PDE reserved.  For legacy BIOS boot, kernel is
@@ -239,21 +248,21 @@
  * vt fb startup needs to be reworked.
  */
 #define	PHYS_IN_DMAP(pa)	(dmaplimit == 0 || (pa) < dmaplimit)
-#define	VIRT_IN_DMAP(va)	((va) >= DMAP_MIN_ADDRESS &&		\
-    (va) < (DMAP_MIN_ADDRESS + dmaplimit))
+#define	VIRT_IN_DMAP(va)	\
+    ((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_low + dmaplimit)
 
 #define	PMAP_HAS_DMAP	1
-#define	PHYS_TO_DMAP(x)	({						\
+#define	PHYS_TO_DMAP(x)	__extension__ ({				\
 	KASSERT(PHYS_IN_DMAP(x),					\
 	    ("physical address %#jx not covered by the DMAP",		\
 	    (uintmax_t)x));						\
-	(x) | DMAP_MIN_ADDRESS; })
+	(x) + kva_layout.dmap_low; })
 
-#define	DMAP_TO_PHYS(x)	({						\
+#define	DMAP_TO_PHYS(x)	__extension__ ({				\
 	KASSERT(VIRT_IN_DMAP(x),					\
 	    ("virtual address %#jx not covered by the DMAP",		\
 	    (uintmax_t)x));						\
-	(x) & ~DMAP_MIN_ADDRESS; })
+	(x) - kva_layout.dmap_low; })
 
 /*
  * amd64 maps the page array into KVA so that it can be more easily
@@ -274,7 +283,7 @@
  */
 #ifndef VM_KMEM_SIZE_MAX
 #define	VM_KMEM_SIZE_MAX	((VM_MAX_KERNEL_ADDRESS - \
-    VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5)
+    kva_layout.km_low + 1) * 3 / 5)
 #endif
 
 /* initial pagein size of beginning of executable file */
diff --git a/sys/amd64/pt/pt.c b/sys/amd64/pt/pt.c
new file mode 100644
index 000000000000..c7b75767680a
--- /dev/null
+++ b/sys/amd64/pt/pt.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright (c) 2025 Bojan Novković <bnovkov@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+/*
+ * hwt(4) Intel Processor Trace (PT) backend
+ *
+ * Driver Design Overview
+ *
+ * - Since PT is configured on a per-core basis, the driver uses
+ *   'smp_rendezvous' to start and disable tracing on each target core.
+ * - PT-specific resources are stored in a 'struct pt_ctx' context structure for
+ *   each traced CPU core or thread. Upon initialization, a ToPA configuration
+ *   is generated for each 'pt_ctx' structure using the HWT tracing buffers.
+ *   The HWT tracing buffer is split into 4K ToPA entries. Currently, each
+ *   4K ToPA entry is configured to trigger an interrupt after it is filled.
+ * - The PT driver uses the XSAVE/XRSTOR PT extensions to load and save all
+ *   relevant PT registers. Every time a traced thread is switched
+ *   out or in, its state will be saved to or loaded from its corresponding
+ *   'pt_ctx' context.
+ * - When tracing starts, the PT hardware will start writing data into the
+ *   tracing buffer. When a TOPA_INT entry is filled, it will trigger an
+ *   interrupt before continuing. The interrupt handler will then fetch the
+ *   last valid tracing buffer offset and enqueue a HWT_RECORD_BUFFER record.
+ *   The driver is currently configured to use the NMI interrupt line.
+ * - The userspace PT backend waits for incoming HWT_RECORD_BUFFER records
+ *   and uses the offsets to decode data from the tracing buffer.
+ *
+ * Future improvements and limitations
+ *
+ * - We currently configure the PT hardware to trigger an interrupt whenever
+ *   a 4K ToPA entry is filled. While this is fine when tracing smaller
+ *   functions or infrequent code paths, this will generate too much interrupt
+ *   traffic when tracing hotter functions. A proper solution for this issue
+ *   should estimate the amount of data generated by the current configuration
+ *   and use it to determine interrupt frequency.
+ *
+ * - Support for more tracing options and PT features.
+ *
+ */
+
+#include <sys/systm.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/sdt.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/fpu.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+
+#include <x86/apicvar.h>
+#include <x86/x86_var.h>
+
+#include <dev/hwt/hwt_context.h>
+#include <dev/hwt/hwt_vm.h>
+#include <dev/hwt/hwt_backend.h>
+#include <dev/hwt/hwt_config.h>
+#include <dev/hwt/hwt_cpu.h>
+#include <dev/hwt/hwt_record.h>
+#include <dev/hwt/hwt_thread.h>
+
+#include <amd64/pt/pt.h>
+
+#ifdef PT_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+#define PT_SUPPORTED_FLAGS						\
+	(RTIT_CTL_MTCEN | RTIT_CTL_CR3FILTER | RTIT_CTL_DIS_TNT |	\
+	    RTIT_CTL_USER | RTIT_CTL_OS | RTIT_CTL_BRANCHEN)
+#define PT_XSAVE_MASK (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)
+#define PT_XSTATE_BV (PT_XSAVE_MASK | XFEATURE_ENABLED_PT)
+#define PT_MAX_IP_RANGES 2
+
+#define PT_TOPA_MASK_PTRS 0x7f
+#define PT_TOPA_PAGE_MASK 0xffffff80
+#define PT_TOPA_PAGE_SHIFT 7
+
+#define CPUID_PT_LEAF	0x14
+
+MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace");
+
+SDT_PROVIDER_DEFINE(pt);
+SDT_PROBE_DEFINE(pt, , , topa__intr);
+
+TASKQUEUE_FAST_DEFINE_THREAD(pt);
+
+static void pt_send_buffer_record(void *arg, int pending __unused);
+static int pt_topa_intr(struct trapframe *tf);
+
+/*
+ * Intel Processor Trace XSAVE-managed state.
+ */
+struct pt_ext_area {
+	uint64_t rtit_ctl;
+	uint64_t rtit_output_base;
+	uint64_t rtit_output_mask_ptrs;
+	uint64_t rtit_status;
+	uint64_t rtit_cr3_match;
+	uint64_t rtit_addr0_a;
+	uint64_t rtit_addr0_b;
+	uint64_t rtit_addr1_a;
+	uint64_t rtit_addr1_b;
+};
+
+struct pt_buffer {
+	uint64_t *topa_hw; /* ToPA table entries. */
+	size_t size;
+	struct mtx lock; /* Lock for fields below. */
+	vm_offset_t offset;
+	uint64_t wrap_count;
+	int curpage;
+};
+
+struct pt_ctx {
+	int id;
+	struct pt_buffer buf; /* ToPA buffer metadata */
+	struct task task;     /* ToPA buffer notification task */
+	struct hwt_context *hwt_ctx;
+	uint8_t *save_area; /* PT XSAVE area */
+};
+/* PT tracing contexts used for CPU mode. */
+static struct pt_ctx *pt_pcpu_ctx;
+
+enum pt_cpu_state {
+	PT_DISABLED = 0,
+	PT_STOPPED,
+	PT_ACTIVE
+};
+
+static struct pt_cpu {
+	struct pt_ctx *ctx;	 /* active PT tracing context */
+	enum pt_cpu_state state; /* used as part of trace stop protocol */
+} *pt_pcpu;
+
+/*
+ * PT-related CPUID bits.
+ */
+static struct pt_cpu_info {
+	uint32_t l0_eax;
+	uint32_t l0_ebx;
+	uint32_t l0_ecx;
+	uint32_t l1_eax;
+	uint32_t l1_ebx;
+	size_t xsave_area_size;
+	size_t xstate_hdr_offset;
+	size_t pt_xsave_offset;
+} pt_info  __read_mostly;
+
+static bool initialized = false;
+static int cpu_mode_ctr = 0;
+
+static __inline enum pt_cpu_state
+pt_cpu_get_state(int cpu_id)
+{
+	return (atomic_load_int(&pt_pcpu[cpu_id].state));
+}
+
+static __inline void
+pt_cpu_set_state(int cpu_id, enum pt_cpu_state state)
+{
+	atomic_store_int(&pt_pcpu[cpu_id].state, state);
+}
+
+static __inline struct xstate_hdr *
+pt_ctx_get_xstate_hdr(struct pt_ctx *ctx)
+{
+	return ((struct xstate_hdr *)(ctx->save_area +
+	    pt_info.xstate_hdr_offset));
+}
+
+
+static __inline struct pt_ext_area *
+pt_ctx_get_ext_area(struct pt_ctx *ctx)
+{
+	return ((struct pt_ext_area *)(ctx->save_area +
+	    pt_info.pt_xsave_offset));
+}
+
+/*
+ * Updates current trace buffer offset from the
+ * ToPA MSRs. Records if the trace buffer wrapped.
+ */
+static __inline void
+pt_update_buffer(struct pt_buffer *buf)
+{
+	uint64_t reg;
+	int curpage;
+
+	/* Update buffer offset. */
+	reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
+	curpage = (reg & PT_TOPA_PAGE_MASK) >> PT_TOPA_PAGE_SHIFT;
+	mtx_lock_spin(&buf->lock);
+	/* Check if the output wrapped. */
+	if (buf->curpage > curpage)
+		buf->wrap_count++;
+	buf->curpage = curpage;
+	buf->offset = reg >> 32;
+	mtx_unlock_spin(&buf->lock);
+
+	dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__,
+	    buf->wrap_count, buf->curpage, buf->offset);
+}
+
+static __inline void
+pt_fill_buffer_record(int id, struct pt_buffer *buf,
+    struct hwt_record_entry *rec)
+{
+	rec->record_type = HWT_RECORD_BUFFER;
+	rec->buf_id = id;
+	rec->curpage = buf->curpage;
+	rec->offset = buf->offset + (buf->wrap_count * buf->size);
+}
+
+/*
+ * Enables or disables tracing on curcpu
+ * using the XSAVE/XRSTOR PT extensions.
+ */
+static void
+pt_cpu_toggle_local(uint8_t *save_area, bool enable)
+{
+	u_long xcr0, cr0;
+	u_long xss;
+
+	cr0 = rcr0();
+	if (cr0 & CR0_TS)
+		clts();
+	xcr0 = rxcr(XCR0);
+	if ((xcr0 & PT_XSAVE_MASK) != PT_XSAVE_MASK)
+		load_xcr(XCR0, xcr0 | PT_XSAVE_MASK);
+	xss = rdmsr(MSR_IA32_XSS);
+	wrmsr(MSR_IA32_XSS, xss | XFEATURE_ENABLED_PT);
+
+	if (!enable) {
+		KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) != 0,
+		    ("%s: PT is disabled", __func__));
+		xsaves(save_area, XFEATURE_ENABLED_PT);
+	} else {
+		KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) == 0,
+		    ("%s: PT is enabled", __func__));
+		xrstors(save_area, XFEATURE_ENABLED_PT);
+	}
+	wrmsr(MSR_IA32_XSS, xss);
+	if ((xcr0 & PT_XSAVE_MASK) != PT_XSAVE_MASK)
+		load_xcr(XCR0, xcr0);
+	if (cr0 & CR0_TS)
+		load_cr0(cr0);
+}
+
+/*
+ * Starts PT tracing on 'curcpu'.
+ */
+static void
+pt_cpu_start(void *dummy)
+{
+	struct pt_cpu *cpu;
+
+	cpu = &pt_pcpu[curcpu];
+	MPASS(cpu->ctx != NULL);
+
+	dprintf("%s: curcpu %d\n", __func__, curcpu);
+	load_cr4(rcr4() | CR4_XSAVE);
+	wrmsr(MSR_IA32_RTIT_STATUS, 0);
+	pt_cpu_set_state(curcpu, PT_ACTIVE);
+	pt_cpu_toggle_local(cpu->ctx->save_area, true);
+}
+
+/*
+ * Stops PT tracing on 'curcpu'.
+ * Updates trace buffer offset to ensure
+ * any data generated between the last interrupt
+ * and the trace stop gets picked up by userspace.
+ */
+static void
+pt_cpu_stop(void *dummy)
+{
+	struct pt_cpu *cpu;
+	struct pt_ctx *ctx;
+
+	/* Shutdown may occur before PT gets properly configured. */
+	if (pt_cpu_get_state(curcpu) == PT_DISABLED)
+		return;
+
+	cpu = &pt_pcpu[curcpu];
+	ctx = cpu->ctx;
+	MPASS(ctx != NULL);
+	dprintf("%s: curcpu %d\n", __func__, curcpu);
+
+	pt_cpu_set_state(curcpu, PT_STOPPED);
+	pt_cpu_toggle_local(cpu->ctx->save_area, false);
+	pt_update_buffer(&ctx->buf);
+}
+
+/*
+ * Prepares the Table of Physical Addresses (ToPA) metadata for 'pt_ctx'.
+ * The HWT trace buffer is split into 4K ToPA table entries and used
+ * as a circular buffer, meaning that the last ToPA entry points to
+ * the first ToPA entry. Each entry is configured to raise an
+ * interrupt after being filled.
+ */
+static int
+pt_topa_prepare(struct pt_ctx *ctx, struct hwt_vm *vm)
+{
+	struct pt_buffer *buf;
+	size_t topa_size;
+	int i;
+
+	topa_size = TOPA_SIZE_4K;
+	buf = &ctx->buf;
+
+	KASSERT(buf->topa_hw == NULL,
+	    ("%s: ToPA info already exists", __func__));
+	buf->topa_hw = mallocarray(vm->npages + 1, sizeof(uint64_t), M_PT,
+	    M_ZERO | M_WAITOK);
+	dprintf("%s: ToPA virt addr %p\n", __func__, buf->topa_hw);
+	buf->size = vm->npages * PAGE_SIZE;
+	for (i = 0; i < vm->npages; i++) {
+		buf->topa_hw[i] = VM_PAGE_TO_PHYS(vm->pages[i]) | topa_size;
+		/*
+		 * XXX: TOPA_INT should ideally be set according to
+		 * expected amount of incoming trace data. Too few TOPA_INT
+		 * entries will not trigger interrupts often enough when tracing
+		 * smaller functions.
+		 */
+		buf->topa_hw[i] |= TOPA_INT;
+	}
+	buf->topa_hw[vm->npages] = (uint64_t)vtophys(buf->topa_hw) | TOPA_END;
+
+	return (0);
+}
+
+/*
+ * Configures IP filtering for trace generation.
+ * A maximum of 2 ranges can be specified due to
+ * limitations imposed by the XSAVE/XRSTOR PT extensions.
+ */
+static int
+pt_configure_ranges(struct pt_ctx *ctx, struct pt_cpu_config *cfg)
+{
+	struct pt_ext_area *pt_ext;
+	int nranges_supp, n, error = 0;
+
+	pt_ext = pt_ctx_get_ext_area(ctx);
+	if (pt_info.l0_ebx & CPUPT_IPF) {
+		nranges_supp = (pt_info.l1_eax & CPUPT_NADDR_M) >>
+		    CPUPT_NADDR_S;
+
+		if (nranges_supp > PT_IP_FILTER_MAX_RANGES)
+			nranges_supp = PT_IP_FILTER_MAX_RANGES;
+		n = cfg->nranges;
+		if (n > nranges_supp) {
+			printf("%s: %d IP filtering ranges requested, CPU "
+			       "supports %d, truncating\n",
+			    __func__, n, nranges_supp);
+			n = nranges_supp;
+		}
+
+		switch (n) {
+		case 2:
+			pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(1));
+			pt_ext->rtit_addr1_a = cfg->ip_ranges[1].start;
+			pt_ext->rtit_addr1_b = cfg->ip_ranges[1].end;
+		case 1:
+			pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(0));
+			pt_ext->rtit_addr0_a = cfg->ip_ranges[0].start;
+			pt_ext->rtit_addr0_b = cfg->ip_ranges[0].end;
+			break;
+		default:
+			error = (EINVAL);
+			break;
+		};
+	} else
+		error = (ENXIO);
+
+	return (error);
+}
+
+static int
+pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id)
+{
+
+	dprintf("%s: ctx id %d\n", __func__, ctx_id);
+
+	KASSERT(pt_ctx->buf.topa_hw == NULL,
+	    ("%s: active ToPA buffer in context %p\n", __func__, pt_ctx));
+
+	memset(pt_ctx, 0, sizeof(struct pt_ctx));
+	mtx_init(&pt_ctx->buf.lock, "pttopa", NULL, MTX_SPIN);
+	pt_ctx->save_area = malloc_aligned(pt_info.xsave_area_size, 64,
+	    M_PT, M_NOWAIT | M_ZERO);
+	if (pt_ctx->save_area == NULL)
+		return (ENOMEM);
+	dprintf("%s: preparing ToPA buffer\n", __func__);
+	if (pt_topa_prepare(pt_ctx, vm) != 0) {
+		dprintf("%s: failed to prepare ToPA buffer\n", __func__);
+		free(pt_ctx->save_area, M_PT);
+		return (ENOMEM);
+	}
+
+	pt_ctx->id = ctx_id;
+	TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx);
+
+	return (0);
+}
+
+static void
+pt_deinit_ctx(struct pt_ctx *pt_ctx)
+{
+
+	if (pt_ctx->buf.topa_hw != NULL)
+		free(pt_ctx->buf.topa_hw, M_PT);
+	if (pt_ctx->save_area != NULL)
+		free(pt_ctx->save_area, M_PT);
+	memset(pt_ctx, 0, sizeof(*pt_ctx));
+	pt_ctx->buf.topa_hw = NULL;
+}
+
+/*
+ * HWT backend configuration method.
+ *
+ * Checks and translates the user-defined configuration to a
+ * set of PT tracing features. Uses the feature set to initialize
+ * the tracing context for the target CPU or thread.
+ */
+static int
+pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id)
+{
+	struct hwt_cpu *hwt_cpu;
+	struct hwt_thread *thr;
+	struct pt_ctx *pt_ctx;
+	struct pt_cpu_config *cfg;
+	struct pt_ext_area *pt_ext;
+	struct xstate_hdr *hdr;
+	int error;
+
+	dprintf("%s\n", __func__);
+
+	cfg = (struct pt_cpu_config *)ctx->config;
+	pt_ctx = NULL;
+
+	/* Clear any flags we don't support yet. */
+	cfg->rtit_ctl &= PT_SUPPORTED_FLAGS;
+	if (cfg->rtit_ctl & RTIT_CTL_MTCEN) {
+		if ((pt_info.l0_ebx & CPUPT_MTC) == 0) {
+			printf("%s: CPU does not support generating MTC "
+			    "packets\n", __func__);
+			return (ENXIO);
+		}
+	}
+
+	if (cfg->rtit_ctl & RTIT_CTL_CR3FILTER) {
+		if ((pt_info.l0_ebx & CPUPT_CR3) == 0) {
+			printf("%s: CPU does not support CR3 filtering\n",
+			    __func__);
+			return (ENXIO);
+		}
+	}
+
+	if (cfg->rtit_ctl & RTIT_CTL_DIS_TNT) {
+		if ((pt_info.l0_ebx & CPUPT_DIS_TNT) == 0) {
+			printf("%s: CPU does not support TNT\n", __func__);
+			return (ENXIO);
+		}
+	}
+	/* TODO: support for more config bits. */
+
+	if (ctx->mode == HWT_MODE_CPU) {
+		TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+			if (hwt_cpu->cpu_id != cpu_id)
+				continue;
+			pt_ctx = &pt_pcpu_ctx[cpu_id];
+			break;
+		}
+	} else {
+		TAILQ_FOREACH(thr, &ctx->threads, next) {
+			if (thr->thread_id != thread_id)
+				continue;
+			KASSERT(thr->private != NULL,
+			    ("%s: hwt thread private"
+			     " not set, thr %p",
+				__func__, thr));
+			pt_ctx = (struct pt_ctx *)thr->private;
+			break;
+		}
+	}
+	if (pt_ctx == NULL)
+		return (ENOENT);
+
+	dprintf("%s: preparing MSRs\n", __func__);
+	pt_ext = pt_ctx_get_ext_area(pt_ctx);
+	hdr = pt_ctx_get_xstate_hdr(pt_ctx);
+
+	pt_ext->rtit_ctl |= cfg->rtit_ctl;
+	if (cfg->nranges != 0) {
+		dprintf("%s: preparing IPF ranges\n", __func__);
+		if ((error = pt_configure_ranges(pt_ctx, cfg)) != 0)
+			return (error);
+	}
+	pt_ctx->hwt_ctx = ctx;
+	pt_ext->rtit_ctl |= RTIT_CTL_TOPA;
+	pt_ext->rtit_output_base = (uint64_t)vtophys(pt_ctx->buf.topa_hw);
+	pt_ext->rtit_output_mask_ptrs = PT_TOPA_MASK_PTRS;
+	hdr->xstate_bv = XFEATURE_ENABLED_PT;
+	hdr->xstate_xcomp_bv = XFEATURE_ENABLED_PT |
+	    XSTATE_XCOMP_BV_COMPACT;
+	pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN;
+	pt_pcpu[cpu_id].ctx = pt_ctx;
+	pt_cpu_set_state(cpu_id, PT_STOPPED);
+
+	return (0);
+}
+
+/*
+ * hwt backend trace start operation. CPU affine.
+ */
+static void
+pt_backend_enable(struct hwt_context *ctx, int cpu_id)
+{
+	if (ctx->mode == HWT_MODE_CPU)
+		return;
+
+	KASSERT(curcpu == cpu_id,
+	    ("%s: attempting to start PT on another cpu", __func__));
+	pt_cpu_start(NULL);
+	CPU_SET(cpu_id, &ctx->cpu_map);
+}
+
+/*
+ * hwt backend trace stop operation. CPU affine.
+ */
+static void
+pt_backend_disable(struct hwt_context *ctx, int cpu_id)
+{
+	struct pt_cpu *cpu;
+
+	if (ctx->mode == HWT_MODE_CPU)
+		return;
+
+	KASSERT(curcpu == cpu_id,
+	    ("%s: attempting to disable PT on another cpu", __func__));
+	pt_cpu_stop(NULL);
+	CPU_CLR(cpu_id, &ctx->cpu_map);
+	cpu = &pt_pcpu[cpu_id];
+	cpu->ctx = NULL;
+}
+
+/*
+ * hwt backend trace start operation for remote CPUs.
+ */
+static int
+pt_backend_enable_smp(struct hwt_context *ctx)
+{
+
+	dprintf("%s\n", __func__);
+	if (ctx->mode == HWT_MODE_CPU &&
+	    atomic_swap_32(&cpu_mode_ctr, 1) != 0)
+		return (-1);
+
+	KASSERT(ctx->mode == HWT_MODE_CPU,
+	    ("%s: should only be used for CPU mode", __func__));
+	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL);
+
+	return (0);
+}
+
+/*
+ * hwt backend trace stop operation for remote CPUs.
+ */
+static int
+pt_backend_disable_smp(struct hwt_context *ctx)
+{
+
+	dprintf("%s\n", __func__);
+	if (ctx->mode == HWT_MODE_CPU &&
+	    atomic_swap_32(&cpu_mode_ctr, 0) == 0)
+		return (-1);
+
+	if (CPU_EMPTY(&ctx->cpu_map)) {
+		dprintf("%s: empty cpu map\n", __func__);
+		return (-1);
+	}
+	smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL);
+
+	return (0);
+}
+
+/*
+ * HWT backend initialization method.
+ *
+ * Installs the ToPA interrupt handler and initializes
+ * the tracing contexts used for HWT_MODE_CPU.
+ */
+static int
+pt_backend_init(struct hwt_context *ctx)
+{
+	struct hwt_cpu *hwt_cpu;
+	int error;
+
+	dprintf("%s\n", __func__);
+	if (ctx->mode == HWT_MODE_CPU) {
+		TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+			error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id],
+			    hwt_cpu->vm, hwt_cpu->cpu_id);
+			if (error)
+				return (error);
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * HWT backend teardown method.
+ *
+ * Removes the ToPA interrupt handler, stops tracing on all active CPUs,
+ * and releases all previously allocated ToPA metadata.
+ */
+static int
+pt_backend_deinit(struct hwt_context *ctx)
+{
+	struct pt_ctx *pt_ctx;
+	struct hwt_thread *thr;
+	int cpu_id;
+
+	dprintf("%s\n", __func__);
+
+	pt_backend_disable_smp(ctx);
+	if (ctx->mode == HWT_MODE_THREAD) {
+		TAILQ_FOREACH(thr, &ctx->threads, next) {
+			KASSERT(thr->private != NULL,
+			    ("%s: thr->private not set", __func__));
+			pt_ctx = (struct pt_ctx *)thr->private;
+			pt_deinit_ctx(pt_ctx);
+		}
+	} else {
+		CPU_FOREACH(cpu_id) {
+			if (!CPU_ISSET(cpu_id, &ctx->cpu_map))
+				continue;
+			if (pt_pcpu[cpu_id].ctx != NULL) {
+				KASSERT(pt_pcpu[cpu_id].ctx ==
+					&pt_pcpu_ctx[cpu_id],
+				    ("%s: CPU mode tracing with non-cpu mode PT"
+				     "context active",
+					__func__));
+				pt_pcpu[cpu_id].ctx = NULL;
+			}
+			pt_ctx = &pt_pcpu_ctx[cpu_id];
+			pt_deinit_ctx(pt_ctx);
+			memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu));
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Fetches current offset into the tracing buffer.
+ */
+static int
+pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset,
+    uint64_t *data)
+{
+	struct pt_buffer *buf;
+
+	if (vm->ctx->mode == HWT_MODE_THREAD)
+		buf = &((struct pt_ctx *)vm->thr->private)->buf;
+	else
+		buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf;
+	mtx_lock_spin(&buf->lock);
+	*curpage = buf->curpage;
+	*curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize);
+	mtx_unlock_spin(&buf->lock);
+
+	return (0);
+}
+
+/*
+ * HWT thread creation hook.
+ * Allocates and associates a 'struct pt_ctx' for a given hwt thread.
+ */
+static int
+pt_backend_alloc_thread(struct hwt_thread *thr)
+{
+	struct pt_ctx *pt_ctx;
+	int error;
+
+	/* Omit M_WAITOK since this might get invoked a non-sleepable context */
+	pt_ctx = malloc(sizeof(*pt_ctx), M_PT, M_NOWAIT | M_ZERO);
+	if (pt_ctx == NULL)
+		return (ENOMEM);
+
+	error = pt_init_ctx(pt_ctx, thr->vm, thr->thread_id);
+	if (error)
+		return (error);
+
+	thr->private = pt_ctx;
+	return (0);
+}
+/*
+ * HWT thread teardown hook.
+ */
+static void
+pt_backend_free_thread(struct hwt_thread *thr)
+{
+	struct pt_ctx *ctx;
+
+	ctx = (struct pt_ctx *)thr->private;
+
+	pt_deinit_ctx(ctx);
+	free(ctx, M_PT);
+}
+
+static void
+pt_backend_dump(int cpu_id)
+{
+}
+
+static struct hwt_backend_ops pt_ops = {
+	.hwt_backend_init = pt_backend_init,
+	.hwt_backend_deinit = pt_backend_deinit,
+
+	.hwt_backend_configure = pt_backend_configure,
+
+	.hwt_backend_enable = pt_backend_enable,
+	.hwt_backend_disable = pt_backend_disable,
+
+#ifdef SMP
+	.hwt_backend_enable_smp = pt_backend_enable_smp,
+	.hwt_backend_disable_smp = pt_backend_disable_smp,
+#endif
+
+	.hwt_backend_read = pt_backend_read,
+	.hwt_backend_dump = pt_backend_dump,
+
+	.hwt_backend_thread_alloc = pt_backend_alloc_thread,
+	.hwt_backend_thread_free = pt_backend_free_thread,
+};
+
+static struct hwt_backend backend = {
+	.ops = &pt_ops,
+	.name = "pt",
+	.kva_req = 1,
+};
+
+/*
+ * Reads the latest valid trace buffer offset and enqueues
+ * a HWT_RECORD_BUFFER record.
+ * Used as a taskqueue routine from the ToPA interrupt handler.
+ */
+static void
+pt_send_buffer_record(void *arg, int pending __unused)
+{
+	struct hwt_record_entry record;
+	struct pt_ctx *ctx = (struct pt_ctx *)arg;
+
+	/* Prepare buffer record. */
+	mtx_lock_spin(&ctx->buf.lock);
+	pt_fill_buffer_record(ctx->id, &ctx->buf, &record);
+	mtx_unlock_spin(&ctx->buf.lock);
+	hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT);
+}
+static void
+pt_topa_status_clear(void)
+{
+	uint64_t reg;
+
+	reg = rdmsr(MSR_IA_GLOBAL_STATUS_RESET);
+	reg &= ~GLOBAL_STATUS_FLAG_TRACETOPAPMI;
+	reg |= GLOBAL_STATUS_FLAG_TRACETOPAPMI;
+	wrmsr(MSR_IA_GLOBAL_STATUS_RESET, reg);
+}
+
+/*
+ * ToPA PMI handler.
+ *
+ * Invoked every time a ToPA entry marked with TOPA_INT is filled.
+ * Uses taskqueue to enqueue a buffer record for userspace.
+ * Re-enables the PC interrupt line as long as tracing is active.
+ */
+static int
+pt_topa_intr(struct trapframe *tf)
+{
+	struct pt_buffer *buf;
+	struct pt_ctx *ctx;
+	uint64_t reg;
+
+	SDT_PROBE0(pt, , , topa__intr);
+
+	if (pt_cpu_get_state(curcpu) != PT_ACTIVE) {
+		return (0);
+	}
+	reg = rdmsr(MSR_IA_GLOBAL_STATUS);
+	if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0) {
+		/* ACK spurious or leftover interrupt. */
+		pt_topa_status_clear();
+		return (1);
+	}
+
+	ctx = pt_pcpu[curcpu].ctx;
+	buf = &ctx->buf;
+	KASSERT(buf->topa_hw != NULL,
+	    ("%s: ToPA PMI interrupt with invalid buffer", __func__));
+
+	pt_cpu_toggle_local(ctx->save_area, false);
+	pt_update_buffer(buf);
+	pt_topa_status_clear();
+	taskqueue_enqueue_flags(taskqueue_pt, &ctx->task,
+	    TASKQUEUE_FAIL_IF_PENDING);
+
+	if (pt_cpu_get_state(curcpu) == PT_ACTIVE) {
+		pt_cpu_toggle_local(ctx->save_area, true);
+		lapic_reenable_pcint();
+	}
+	return (1);
+}
+
+/*
+ * Module initialization.
+ *
+ * Saves all PT-related cpuid info, registers itself as a HWT backend,
+ * and allocates metadata required to keep track of tracing operations
+ * on each CPU.
+ */
+static int
+pt_init(void)
+{
+	u_int cp[4];
+	int error;
+
+	dprintf("pt: Enumerating part 1\n");
+	cpuid_count(CPUID_PT_LEAF, 0, cp);
+	dprintf("pt: Maximum valid sub-leaf Index: %x\n", cp[0]);
+	dprintf("pt: ebx %x\n", cp[1]);
+	dprintf("pt: ecx %x\n", cp[2]);
+
+	pt_info.l0_eax = cp[0];
+	pt_info.l0_ebx = cp[1];
+	pt_info.l0_ecx = cp[2];
+
+	dprintf("pt: Enumerating part 2\n");
+	cpuid_count(CPUID_PT_LEAF, 1, cp);
+	dprintf("pt: eax %x\n", cp[0]);
+	dprintf("pt: ebx %x\n", cp[1]);
+
+	pt_info.l1_eax = cp[0];
+	pt_info.l1_ebx = cp[1];
+
+	error = hwt_backend_register(&backend);
+	if (error != 0) {
+		printf("pt: unable to register hwt backend, error %d\n", error);
+		return (error);
+	}
+	pt_pcpu = mallocarray(mp_ncpus, sizeof(struct pt_cpu), M_PT,
+	    M_ZERO | M_WAITOK);
+	pt_pcpu_ctx = mallocarray(mp_ncpus, sizeof(struct pt_ctx), M_PT,
+	    M_ZERO | M_WAITOK);
+
+	nmi_register_handler(pt_topa_intr);
+	if (!lapic_enable_pcint()) {
+		nmi_remove_handler(pt_topa_intr);
+		hwt_backend_unregister(&backend);
+		free(pt_pcpu, M_PT);
+		free(pt_pcpu_ctx, M_PT);
+		pt_pcpu = NULL;
+		pt_pcpu_ctx = NULL;
+		printf("pt: failed to setup interrupt line\n");
+		return (error);
+	}
+	initialized = true;
+
+	return (0);
+}
+
+/*
+ * Checks whether the CPU support Intel PT and
+ * initializes XSAVE area info.
+ *
+ * The driver relies on XSAVE/XRSTOR PT extensions,
+ * Table of Physical Addresses (ToPA) support, and
+ * support for multiple ToPA entries.
+ */
+static bool
+pt_supported(void)
+{
+	u_int cp[4];
+
+	if ((cpu_stdext_feature & CPUID_STDEXT_PROCTRACE) == 0) {
+		printf("pt: CPU does not support Intel Processor Trace\n");
+		return (false);
+	}
+	if ((cpu_feature2 & CPUID2_XSAVE) == 0) {
+		printf("pt: XSAVE is not supported\n");
+		return (false);
+	}
+	if (!xsave_extfeature_supported(XFEATURE_ENABLED_PT, true)) {
+		printf("pt: CPU does not support managing PT state using XSAVE\n");
+		return (false);
+	}
+	if (!xsave_extension_supported(CPUID_EXTSTATE_XSAVEC)) {
+		printf("pt: XSAVE compaction is not supported\n");
+		return (false);
+	}
+	if (!xsave_extension_supported(CPUID_EXTSTATE_XSAVES)) {
+		printf("pt: CPU does not support XSAVES/XRSTORS\n");
+		return (false);
+	}
+
+	/* Require ToPA support. */
+	cpuid_count(CPUID_PT_LEAF, 0, cp);
+	if ((cp[2] & CPUPT_TOPA) == 0) {
+		printf("pt: ToPA is not supported\n");
+		return (false);
+	}
+	if ((cp[2] & CPUPT_TOPA_MULTI) == 0) {
+		printf("pt: multiple ToPA outputs are not supported\n");
+		return (false);
+	}
+
+	pt_info.xstate_hdr_offset = xsave_area_hdr_offset();
+	pt_info.xsave_area_size = xsave_area_size(PT_XSTATE_BV, true, true);
+	pt_info.pt_xsave_offset = xsave_area_offset(PT_XSTATE_BV,
+	    XFEATURE_ENABLED_PT, true, true);
+
+	return (true);
+}
+
+static void
+pt_deinit(void)
+{
+	if (!initialized)
+		return;
+	nmi_remove_handler(pt_topa_intr);
+	lapic_disable_pcint();
+	hwt_backend_unregister(&backend);
+	free(pt_pcpu, M_PT);
+	free(pt_pcpu_ctx, M_PT);
+	pt_pcpu = NULL;
+	initialized = false;
+}
+
+static int
+pt_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		if (!pt_supported() || pt_init() != 0) {
+			return (ENXIO);
+		}
+		break;
+	case MOD_UNLOAD:
+		pt_deinit();
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+static moduledata_t pt_mod = { "intel_pt", pt_modevent, NULL };
+
+DECLARE_MODULE(intel_pt, pt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
+MODULE_DEPEND(intel_pt, hwt, 1, 1, 1);
+MODULE_VERSION(intel_pt, 1);
diff --git a/sys/amd64/pt/pt.h b/sys/amd64/pt/pt.h
new file mode 100644
index 000000000000..2423afdf22e9
--- /dev/null
+++ b/sys/amd64/pt/pt.h
@@ -0,0 +1,49 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Bojan Novković <bnovkov@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _AMD64_PT_PT_H_
+#define _AMD64_PT_PT_H_
+
+#include <sys/types.h>
+
+#include <x86/include/specialreg.h>
+
+#define PT_IP_FILTER_MAX_RANGES (2) /* Intel SDM Vol. 3C, 33-29 */
+
+struct pt_cpu_config {
+	uint64_t rtit_ctl;
+	register_t cr3_filter;
+	int nranges;
+	struct ipf_range {
+		vm_offset_t start;
+		vm_offset_t end;
+	} ip_ranges[PT_IP_FILTER_MAX_RANGES];
+	uint32_t mtc_freq;
+	uint32_t cyc_thresh;
+	uint32_t psb_freq;
+};
+#endif /* !_AMD64_PT_PT_H_ */
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S
index f393f160b101..130130b64541 100644
--- a/sys/amd64/vmm/intel/vmx_support.S
+++ b/sys/amd64/vmm/intel/vmx_support.S
@@ -32,12 +32,6 @@
 
 #include "vmx_assym.h"
 
-#ifdef SMP
-#define	LK	lock ;
-#else
-#define	LK
-#endif
-
 /* Be friendly to DTrace FBT's prologue/epilogue pattern matching */
 #define VENTER  push %rbp ; mov %rsp,%rbp
 #define VLEAVE  pop %rbp
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 92eb0589f80b..78883296c5b7 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -5767,7 +5767,7 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 
 	CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m,
 	    VM_PAGE_TO_PHYS(m), oma, ma);
-	if ((m->flags & PG_FICTITIOUS) != 0)
+	if (ma == oma || (m->flags & PG_FICTITIOUS) != 0)
 		return;
 #if 0
 	/*
@@ -5784,22 +5784,20 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 	 * If page is not mapped by sf buffer, map the page
 	 * transient and do invalidation.
 	 */
-	if (ma != oma) {
-		pa = VM_PAGE_TO_PHYS(m);
-		sched_pin();
-		pc = get_pcpu();
-		cmap2_pte2p = pc->pc_cmap2_pte2p;
-		mtx_lock(&pc->pc_cmap_lock);
-		if (pte2_load(cmap2_pte2p) != 0)
-			panic("%s: CMAP2 busy", __func__);
-		pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
-		    vm_memattr_to_pte2(ma)));
-		dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE);
-		pte2_clear(cmap2_pte2p);
-		tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
-		sched_unpin();
-		mtx_unlock(&pc->pc_cmap_lock);
-	}
+	pa = VM_PAGE_TO_PHYS(m);
+	sched_pin();
+	pc = get_pcpu();
+	cmap2_pte2p = pc->pc_cmap2_pte2p;
+	mtx_lock(&pc->pc_cmap_lock);
+	if (pte2_load(cmap2_pte2p) != 0)
+		panic("%s: CMAP2 busy", __func__);
+	pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
+	    vm_memattr_to_pte2(ma)));
+	dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE);
+	pte2_clear(cmap2_pte2p);
+	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
+	sched_unpin();
+	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index d2e56a270f54..a09da794e77d 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -497,7 +497,8 @@ static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp);
 static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
-    pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
+    pd_entry_t l1e, bool demote_kl2e, struct spglist *free,
+    struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
 static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
@@ -3847,8 +3848,7 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	ml3 = pmap_remove_pt_page(pmap, va);
-	if (ml3 == NULL)
-		panic("pmap_remove_kernel_l2: Missing pt page");
+	KASSERT(ml3 != NULL, ("pmap_remove_kernel_l2: missing pt page"));
 
 	ml3pa = VM_PAGE_TO_PHYS(ml3);
 	newl2 = PHYS_TO_PTE(ml3pa) | L2_TABLE;
@@ -3873,8 +3873,8 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
  * pmap_remove_l2: Do the things to unmap a level 2 superpage.
  */
 static int
-pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
-    pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
+pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e,
+    bool demote_kl2e, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pt_entry_t old_l2;
@@ -3910,9 +3910,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 				vm_page_aflag_clear(mt, PGA_WRITEABLE);
 		}
 	}
-	if (pmap == kernel_pmap) {
-		pmap_remove_kernel_l2(pmap, l2, sva);
-	} else {
+	if (pmap != kernel_pmap) {
 		ml3 = pmap_remove_pt_page(pmap, sva);
 		if (ml3 != NULL) {
 			KASSERT(vm_page_any_valid(ml3),
@@ -3923,6 +3921,14 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 			ml3->ref_count = 0;
 			pmap_add_delayed_free_list(ml3, free, false);
 		}
+	} else if (demote_kl2e) {
+		pmap_remove_kernel_l2(pmap, l2, sva);
+	} else {
+		ml3 = vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(sva));
+		if (vm_page_any_valid(ml3)) {
+			ml3->valid = 0;
+			pmap_zero_page(ml3);
+		}
 	}
 	return (pmap_unuse_pt(pmap, sva, l1e, free));
 }
@@ -4232,7 +4238,7 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete)
 		if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				pmap_remove_l2(pmap, l2, sva, pmap_load(l1),
-				    &free, &lock);
+				    true, &free, &lock);
 				continue;
 			} else if (pmap_demote_l2_locked(pmap, l2, sva,
 			    &lock) == NULL)
@@ -5747,33 +5753,51 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
 			}
 		}
 		SLIST_INIT(&free);
-		if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK)
+		if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
 			(void)pmap_remove_l2(pmap, l2, va,
-			    pmap_load(pmap_l1(pmap, va)), &free, lockp);
-		else
+			    pmap_load(pmap_l1(pmap, va)), false, &free, lockp);
+		} else {
+			if (ADDR_IS_KERNEL(va)) {
+				/*
+				 * Try to save the ptp in the trie
+				 * before any changes to mappings are
+				 * made.  Abort on failure.
+				 */
+				mt = PTE_TO_VM_PAGE(old_l2);
+				if (pmap_insert_pt_page(pmap, mt, false,
+				    false)) {
+					CTR1(KTR_PMAP,
+			    "pmap_enter_l2: cannot ins kern ptp va %#lx",
+					    va);
+					return (KERN_RESOURCE_SHORTAGE);
+				}
+				/*
+				 * Both pmap_remove_l2() and
+				 * pmap_remove_l3_range() will zero fill
+				 * the L3 kernel page table page.
+				 */
+			}
 			pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE,
 			    &free, lockp);
+			if (ADDR_IS_KERNEL(va)) {
+				/*
+				 * The TLB could have an intermediate
+				 * entry for the L3 kernel page table
+				 * page, so request an invalidation at
+				 * all levels after clearing the
+				 * L2_TABLE entry.
+				 */
+				pmap_clear(l2);
+				pmap_s1_invalidate_page(pmap, va, false);
+			}
+		}
+		KASSERT(pmap_load(l2) == 0,
+		    ("pmap_enter_l2: non-zero L2 entry %p", l2));
 		if (!ADDR_IS_KERNEL(va)) {
 			vm_page_free_pages_toq(&free, true);
-			KASSERT(pmap_load(l2) == 0,
-			    ("pmap_enter_l2: non-zero L2 entry %p", l2));
 		} else {
 			KASSERT(SLIST_EMPTY(&free),
 			    ("pmap_enter_l2: freed kernel page table page"));
-
-			/*
-			 * Both pmap_remove_l2() and pmap_remove_l3_range()
-			 * will leave the kernel page table page zero filled.
-			 * Nonetheless, the TLB could have an intermediate
-			 * entry for the kernel page table page, so request
-			 * an invalidation at all levels after clearing
-			 * the L2_TABLE entry.
-			 */
-			mt = PTE_TO_VM_PAGE(pmap_load(l2));
-			if (pmap_insert_pt_page(pmap, mt, false, false))
-				panic("pmap_enter_l2: trie insert failed");
-			pmap_clear(l2);
-			pmap_s1_invalidate_page(pmap, va, false);
 		}
 	}
 
@@ -8045,6 +8069,8 @@ pmap_unmapbios(void *p, vm_size_t size)
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
+	if (m->md.pv_memattr == ma)
+		return;
 
 	m->md.pv_memattr = ma;
 
@@ -8424,8 +8450,8 @@ pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2,
 	struct spglist free;
 
 	SLIST_INIT(&free);
-	(void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free,
-	    lockp);
+	(void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), true,
+	    &free, lockp);
 	vm_page_free_pages_toq(&free, true);
 }
 
diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h
index 0ce38384abbf..83d964360343 100644
--- a/sys/cddl/boot/zfs/zfsimpl.h
+++ b/sys/cddl/boot/zfs/zfsimpl.h
@@ -2019,6 +2019,7 @@ typedef struct vdev {
 	vdev_list_t	v_children;	/* children of this vdev */
 	const char	*v_name;	/* vdev name */
 	uint64_t	v_guid;		/* vdev guid */
+	uint64_t	v_txg;		/* most recent transaction */
 	uint64_t	v_id;		/* index in parent */
 	uint64_t	v_psize;	/* physical device capacity */
 	int		v_ashift;	/* offset to block shift */
@@ -2048,7 +2049,6 @@ typedef struct spa {
 	STAILQ_ENTRY(spa) spa_link;	/* link in global pool list */
 	char		*spa_name;	/* pool name */
 	uint64_t	spa_guid;	/* pool guid */
-	uint64_t	spa_txg;	/* most recent transaction */
 	struct uberblock *spa_uberblock;	/* best uberblock so far */
 	vdev_t		*spa_root_vdev;	/* toplevel vdev container */
 	objset_phys_t	*spa_mos;	/* MOS for this pool */
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h
index f3a840d9bf4b..efa5c8cb67b3 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -45,7 +45,7 @@
 
 MALLOC_DECLARE(M_KMALLOC);
 
-#define	kvzalloc(size, flags)		kmalloc(size, (flags) | __GFP_ZERO)
+#define	kvzalloc(size, flags)		kvmalloc(size, (flags) | __GFP_ZERO)
 #define	kvcalloc(n, size, flags)	kvmalloc_array(n, size, (flags) | __GFP_ZERO)
 #define	kzalloc(size, flags)		kmalloc(size, (flags) | __GFP_ZERO)
 #define	kzalloc_node(size, flags, node)	kmalloc_node(size, (flags) | __GFP_ZERO, node)
diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c
index ebb92eacbf9a..628af17df853 100644
--- a/sys/compat/linuxkpi/common/src/linux_page.c
+++ b/sys/compat/linuxkpi/common/src/linux_page.c
@@ -106,6 +106,7 @@ linux_alloc_pages(gfp_t flags, unsigned int order)
 
 		if ((flags & M_ZERO) != 0)
 			req |= VM_ALLOC_ZERO;
+
 		if (order == 0 && (flags & GFP_DMA32) == 0) {
 			page = vm_page_alloc_noobj(req);
 			if (page == NULL)
@@ -113,6 +114,10 @@ linux_alloc_pages(gfp_t flags, unsigned int order)
 		} else {
 			vm_paddr_t pmax = (flags & GFP_DMA32) ?
 			    BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR;
+
+			if ((flags & __GFP_NORETRY) != 0)
+				req |= VM_ALLOC_NORECLAIM;
+
 		retry:
 			page = vm_page_alloc_noobj_contig(req, npages, 0, pmax,
 			    PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
diff --git a/sys/conf/files b/sys/conf/files
index 74d251c2b608..dd0d390962f2 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3227,6 +3227,19 @@ dev/uart/uart_if.m		optional uart
 dev/uart/uart_subr.c		optional uart
 dev/uart/uart_tty.c		optional uart
 #
+# Universal Flash Storage Host Controller Interface drivers
+#
+dev/ufshci/ufshci.c		optional ufshci
+dev/ufshci/ufshci_ctrlr.c	optional ufshci
+dev/ufshci/ufshci_ctrlr_cmd.c	optional ufshci
+dev/ufshci/ufshci_dev.c		optional ufshci
+dev/ufshci/ufshci_pci.c		optional ufshci
+dev/ufshci/ufshci_req_queue.c	optional ufshci
+dev/ufshci/ufshci_req_sdb.c	optional ufshci
+dev/ufshci/ufshci_sim.c		optional ufshci
+dev/ufshci/ufshci_sysctl.c	optional ufshci
+dev/ufshci/ufshci_uic_cmd.c	optional ufshci
+#
 # USB controller drivers
 #
 dev/usb/controller/musb_otg.c		optional musb
diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c
index f9468e0deda0..94f4e5771266 100644
--- a/sys/dev/gpio/acpi_gpiobus.c
+++ b/sys/dev/gpio/acpi_gpiobus.c
@@ -357,7 +357,7 @@ acpi_gpiobus_attach(device_t dev)
 	status = AcpiWalkResources(handle, "_AEI", acpi_gpiobus_enumerate_aei,
 	    &ctx);
 
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND)
 		device_printf(dev, "Failed to enumerate AEI resources\n");
 
 	return (0);
diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c
index ab7f13177969..764bcb7e6ee8 100644
--- a/sys/dev/gpio/gpiobus.c
+++ b/sys/dev/gpio/gpiobus.c
@@ -110,10 +110,9 @@ gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags,
 	res = bus_alloc_resource(consumer_dev, SYS_RES_IRQ, rid, irq, irq, 1,
 	    alloc_flags);
 	if (res == NULL) {
-		intr_free_intr_map_data((struct intr_map_data *)gpio_data);
+		intr_unmap_irq(irq);
 		return (NULL);
 	}
-	rman_set_virtual(res, gpio_data);
 	return (res);
 }
 #else
@@ -866,6 +865,25 @@ gpiobus_alloc_resource(device_t bus, device_t child, int type, int *rid,
 	    end, count, flags));
 }
 
+static int
+gpiobus_release_resource(device_t dev, device_t child, struct resource *r)
+{
+	int err;
+#ifdef INTRNG
+	u_int irq;
+
+	irq = rman_get_start(r);
+	MPASS(irq == rman_get_end(r));
+#endif
+	err = bus_generic_rman_release_resource(dev, child, r);
+	if (err != 0)
+		return (err);
+#ifdef INTRNG
+	intr_unmap_irq(irq);
+#endif
+	return (0);
+}
+
 static struct resource_list *
 gpiobus_get_resource_list(device_t bus __unused, device_t child)
 {
@@ -1060,7 +1078,7 @@ static device_method_t gpiobus_methods[] = {
 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
 	DEVMETHOD(bus_alloc_resource,	gpiobus_alloc_resource),
-	DEVMETHOD(bus_release_resource,	bus_generic_rman_release_resource),
+	DEVMETHOD(bus_release_resource,	gpiobus_release_resource),
 	DEVMETHOD(bus_activate_resource,	bus_generic_rman_activate_resource),
 	DEVMETHOD(bus_deactivate_resource,	bus_generic_rman_deactivate_resource),
 	DEVMETHOD(bus_get_resource_list,	gpiobus_get_resource_list),
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 741a7c013f7d..29dc0c880e3a 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -11,9 +11,9 @@
  */
 
 /*-
- * The following functions are based on the vn(4) driver: mdstart_swap(),
- * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
- * and as such under the following copyright:
+ * The following functions are based on the historical vn(4) driver:
+ * mdstart_swap(), mdstart_vnode(), mdcreate_swap(), mdcreate_vnode()
+ * and mddestroy(), and as such under the following copyright:
  *
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1990, 1993
diff --git a/sys/dev/mgb/if_mgb.c b/sys/dev/mgb/if_mgb.c
index 1240d0f84415..409f34167df0 100644
--- a/sys/dev/mgb/if_mgb.c
+++ b/sys/dev/mgb/if_mgb.c
@@ -1435,7 +1435,7 @@ mgb_hw_teardown(struct mgb_softc *sc)
 
 	/* Stop MAC */
 	CSR_CLEAR_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL);
-	CSR_WRITE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL);
+	CSR_CLEAR_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL);
 	if ((err = mgb_wait_for_bits(sc, MGB_MAC_RX, MGB_MAC_DSBL, 0)))
 		return (err);
 	if ((err = mgb_wait_for_bits(sc, MGB_MAC_TX, MGB_MAC_DSBL, 0)))
diff --git a/sys/dev/mlx5/mlx5_accel/ipsec.h b/sys/dev/mlx5/mlx5_accel/ipsec.h
index 361b9f72d873..c3f3a2372482 100644
--- a/sys/dev/mlx5/mlx5_accel/ipsec.h
+++ b/sys/dev/mlx5/mlx5_accel/ipsec.h
@@ -260,8 +260,8 @@ int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv);
 void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv);
 int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv);
 int mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr);
-void mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
-    struct mlx5e_rq_mbuf *mr);
+void mlx5e_accel_ipsec_handle_rx_cqe(if_t ifp, struct mbuf *mb,
+    struct mlx5_cqe64 *cqe, struct mlx5e_rq_mbuf *mr);
 
 static inline int mlx5e_accel_ipsec_flow(struct mlx5_cqe64 *cqe)
 {
@@ -269,12 +269,12 @@ static inline int mlx5e_accel_ipsec_flow(struct mlx5_cqe64 *cqe)
 }
 
 static inline void
-mlx5e_accel_ipsec_handle_rx(struct mbuf *mb, struct mlx5_cqe64 *cqe,
+mlx5e_accel_ipsec_handle_rx(if_t ifp, struct mbuf *mb, struct mlx5_cqe64 *cqe,
     struct mlx5e_rq_mbuf *mr)
 {
 	u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
 
 	if (MLX5_IPSEC_METADATA_MARKER(ipsec_meta_data))
-		mlx5e_accel_ipsec_handle_rx_cqe(mb, cqe, mr);
+		mlx5e_accel_ipsec_handle_rx_cqe(ifp, mb, cqe, mr);
 }
 #endif	/* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
index 0883cfb2d510..5dccb8bc2b87 100644
--- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
+++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
@@ -24,11 +24,14 @@
  *
  */
 
+#include "opt_ipsec.h"
+
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <netipsec/keydb.h>
 #include <netipsec/ipsec_offload.h>
+#include <netipsec/xform.h>
 #include <dev/mlx5/qp.h>
 #include <dev/mlx5/mlx5_en/en.h>
 #include <dev/mlx5/mlx5_accel/ipsec.h>
@@ -48,7 +51,8 @@ mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr)
 		return (0);
 
 	mtag = (struct ipsec_accel_in_tag *)m_tag_get(
-	    PACKET_TAG_IPSEC_ACCEL_IN, sizeof(*mtag), M_NOWAIT);
+	    PACKET_TAG_IPSEC_ACCEL_IN, sizeof(struct ipsec_accel_in_tag) -
+	    __offsetof(struct ipsec_accel_in_tag, xh), M_NOWAIT);
 	if (mtag == NULL)
 		return (-ENOMEM);
 	mr->ipsec_mtag = mtag;
@@ -56,8 +60,8 @@ mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr)
 }
 
 void
-mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
-    struct mlx5e_rq_mbuf *mr)
+mlx5e_accel_ipsec_handle_rx_cqe(if_t ifp, struct mbuf *mb,
+    struct mlx5_cqe64 *cqe, struct mlx5e_rq_mbuf *mr)
 {
 	struct ipsec_accel_in_tag *mtag;
 	u32 drv_spi;
@@ -65,10 +69,12 @@ mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
 	drv_spi = MLX5_IPSEC_METADATA_HANDLE(be32_to_cpu(cqe->ft_metadata));
 	mtag = mr->ipsec_mtag;
 	WARN_ON(mtag == NULL);
-	mr->ipsec_mtag = NULL;
 	if (mtag != NULL) {
 		mtag->drv_spi = drv_spi;
-		m_tag_prepend(mb, &mtag->tag);
+		if (ipsec_accel_fill_xh(ifp, drv_spi, &mtag->xh)) {
+			m_tag_prepend(mb, &mtag->tag);
+			mr->ipsec_mtag = NULL;
+		}
 	}
 }
 
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
index 4de451f1b039..89d2010656c5 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
@@ -659,7 +659,8 @@ mlx5e_tls_rx_set_params(void *ctx, struct inpcb *inp, const struct tls_session_p
 		return (EINVAL);
 
 	MLX5_SET64(sw_tls_rx_cntx, ctx, param.initial_record_number, tls_sn_he);
-	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, tcp_sn_he);
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, 0);
+	MLX5_SET(sw_tls_rx_cntx, ctx, progress.next_record_tcp_sn, tcp_sn_he);
 
 	return (0);
 }
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index 6b53db6fea23..eb569488631a 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -467,7 +467,7 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
 		break;
 	}
 
-	mlx5e_accel_ipsec_handle_rx(mb, cqe, mr);
+	mlx5e_accel_ipsec_handle_rx(ifp, mb, cqe, mr);
 }
 
 static inline void
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 05ec69a70dfe..9d23d5df1d2b 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -30,6 +30,8 @@
  * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131.
  */
 
+#include "opt_inet.h"
+
 #include <sys/cdefs.h>
 #include "qlnx_os.h"
 #include "bcm_osal.h"
@@ -2778,7 +2780,7 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
 
 		if (!p_ptt) {
 			QL_DPRINT1(ha, "ecore_ptt_acquire failed\n");
-			ret = -1;
+			ret = ERESTART;
 			break;
 		}
 
@@ -2789,7 +2791,7 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
 		ecore_ptt_release(p_hwfn, p_ptt);
 
 		if (ret) {
-			ret = -1;
+			ret = ENODEV;
 			break;
 		}
 
diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h
index cac743884ee6..ac58d44102a0 100644
--- a/sys/dev/ufshci/ufshci_private.h
+++ b/sys/dev/ufshci/ufshci_private.h
@@ -149,6 +149,8 @@ struct ufshci_hw_queue {
 	bus_dmamap_t queuemem_map;
 	bus_addr_t req_queue_addr;
 
+	bus_addr_t *ucd_bus_addr;
+
 	uint32_t num_entries;
 	uint32_t num_trackers;
 
@@ -198,8 +200,6 @@ struct ufshci_req_queue {
 	bus_dma_tag_t dma_tag_payload;
 
 	bus_dmamap_t ucdmem_map;
-
-	bus_addr_t ucd_addr;
 };
 
 struct ufshci_device {
diff --git a/sys/dev/ufshci/ufshci_req_sdb.c b/sys/dev/ufshci/ufshci_req_sdb.c
index 4670281d367a..b1f303afaef5 100644
--- a/sys/dev/ufshci/ufshci_req_sdb.c
+++ b/sys/dev/ufshci/ufshci_req_sdb.c
@@ -48,6 +48,29 @@ ufshci_req_sdb_cmd_desc_destroy(struct ufshci_req_queue *req_queue)
 	}
 }
 
+static void
+ufshci_ucd_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
+{
+	struct ufshci_hw_queue *hwq = arg;
+	int i;
+
+	if (error != 0) {
+		printf("ufshci: Failed to map UCD, error = %d\n", error);
+		return;
+	}
+
+	if (hwq->num_trackers != nseg) {
+		printf(
+		    "ufshci: Failed to map UCD, num_trackers = %d, nseg = %d\n",
+		    hwq->num_trackers, nseg);
+		return;
+	}
+
+	for (i = 0; i < nseg; i++) {
+		hwq->ucd_bus_addr[i] = seg[i].ds_addr;
+	}
+}
+
 static int
 ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
     uint32_t num_entries, struct ufshci_controller *ctrlr)
@@ -55,7 +78,6 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	struct ufshci_hw_queue *hwq = &req_queue->hwq[UFSHCI_SDB_Q];
 	struct ufshci_tracker *tr;
 	size_t ucd_allocsz, payload_allocsz;
-	uint64_t ucdmem_phys;
 	uint8_t *ucdmem;
 	int i, error;
 
@@ -71,10 +93,11 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	 * Allocate physical memory for UTP Command Descriptor (UCD)
 	 * Note: UFSHCI UCD format is restricted to 128-byte alignment.
 	 */
-	error = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 128,
-	    ctrlr->page_size, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
-	    ucd_allocsz, howmany(ucd_allocsz, ctrlr->page_size),
-	    ctrlr->page_size, 0, NULL, NULL, &req_queue->dma_tag_ucd);
+	error = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 128, 0,
+	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, ucd_allocsz,
+	    howmany(ucd_allocsz, sizeof(struct ufshci_utp_cmd_desc)),
+	    sizeof(struct ufshci_utp_cmd_desc), 0, NULL, NULL,
+	    &req_queue->dma_tag_ucd);
 	if (error != 0) {
 		ufshci_printf(ctrlr, "request cmd desc tag create failed %d\n",
 		    error);
@@ -88,7 +111,7 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	}
 
 	if (bus_dmamap_load(req_queue->dma_tag_ucd, req_queue->ucdmem_map,
-		ucdmem, ucd_allocsz, ufshci_single_map, &ucdmem_phys, 0) != 0) {
+		ucdmem, ucd_allocsz, ufshci_ucd_map, hwq, 0) != 0) {
 		ufshci_printf(ctrlr, "failed to load cmd desc memory\n");
 		bus_dmamem_free(req_queue->dma_tag_ucd, req_queue->ucd,
 		    req_queue->ucdmem_map);
@@ -96,7 +119,6 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	}
 
 	req_queue->ucd = (struct ufshci_utp_cmd_desc *)ucdmem;
-	req_queue->ucd_addr = ucdmem_phys;
 
 	/*
 	 * Allocate physical memory for PRDT
@@ -128,10 +150,9 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 		tr->slot_state = UFSHCI_SLOT_STATE_FREE;
 
 		tr->ucd = (struct ufshci_utp_cmd_desc *)ucdmem;
-		tr->ucd_bus_addr = ucdmem_phys;
+		tr->ucd_bus_addr = hwq->ucd_bus_addr[i];
 
 		ucdmem += sizeof(struct ufshci_utp_cmd_desc);
-		ucdmem_phys += sizeof(struct ufshci_utp_cmd_desc);
 
 		hwq->act_tr[i] = tr;
 	}
@@ -175,6 +196,11 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr,
 	req_queue->hwq = malloc(sizeof(struct ufshci_hw_queue), M_UFSHCI,
 	    M_ZERO | M_NOWAIT);
 	hwq = &req_queue->hwq[UFSHCI_SDB_Q];
+	hwq->num_entries = req_queue->num_entries;
+	hwq->num_trackers = req_queue->num_trackers;
+	req_queue->hwq->ucd_bus_addr = malloc(sizeof(bus_addr_t) *
+		req_queue->num_trackers,
+	    M_UFSHCI, M_ZERO | M_NOWAIT);
 
 	mtx_init(&hwq->qlock, "ufshci req_queue lock", NULL, MTX_DEF);
 
@@ -277,6 +303,7 @@ ufshci_req_sdb_destroy(struct ufshci_controller *ctrlr,
 	if (mtx_initialized(&hwq->qlock))
 		mtx_destroy(&hwq->qlock);
 
+	free(req_queue->hwq->ucd_bus_addr, M_UFSHCI);
 	free(req_queue->hwq, M_UFSHCI);
 }
 
diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c
index b50e33ea36ce..d5cfd228a429 100644
--- a/sys/dev/usb/controller/xhci_pci.c
+++ b/sys/dev/usb/controller/xhci_pci.c
@@ -99,6 +99,11 @@ xhci_pci_match(device_t self)
 		return ("AMD Starship USB 3.0 controller");
 	case 0x149c1022:
 		return ("AMD Matisse USB 3.0 controller");
+	case 0x15b61022:
+	case 0x15b71022:
+		return ("AMD Raphael/Granite Ridge USB 3.1 controller");
+	case 0x15b81022:
+		return ("AMD Raphael/Granite Ridge USB 2.0 controller");
 	case 0x15e01022:
 	case 0x15e11022:
 		return ("AMD Raven USB 3.1 controller");
@@ -109,6 +114,8 @@ xhci_pci_match(device_t self)
 		return ("AMD 300 Series USB 3.1 controller");
 	case 0x43d51022:
 		return ("AMD 400 Series USB 3.1 controller");
+	case 0x43f71022:
+		return ("AMD 600 Series USB 3.2 controller");
 	case 0x78121022:
 	case 0x78141022:
 	case 0x79141022:
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 676ea5de12b8..58a22b8bdc50 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -547,6 +547,8 @@ fdesc_readdir(struct vop_readdir_args *ap)
 	fmp = VFSTOFDESC(ap->a_vp->v_mount);
 	if (ap->a_ncookies != NULL)
 		*ap->a_ncookies = 0;
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
 
 	off = (int)uio->uio_offset;
 	if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
@@ -559,7 +561,12 @@ fdesc_readdir(struct vop_readdir_args *ap)
 	fcnt = i - 2;		/* The first two nodes are `.' and `..' */
 
 	FILEDESC_SLOCK(fdp);
-	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
+	while (uio->uio_resid >= UIO_MX) {
+		if (i >= fdp->fd_nfiles + 2) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
+			break;
+		}
 		bzero((caddr_t)dp, UIO_MX);
 		switch (i) {
 		case 0:	/* `.' */
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 5db61c8951f6..33e0d94954d7 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -1521,6 +1521,9 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
+
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the filesystem vnode, and hence can
@@ -1614,8 +1617,11 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
-		if (diff <= 0)
-			break;
+		if (diff <= 0) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
+			goto out;
+		}
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
@@ -1646,6 +1652,8 @@ msdosfs_readdir(struct vop_readdir_args *ap)
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
+				if (ap->a_eofflag != NULL)
+					*ap->a_eofflag = 1;
 				goto out;
 			}
 			/*
@@ -1743,15 +1751,6 @@ out:
 
 	uio->uio_offset = off;
 
-	/*
-	 * Set the eofflag (NFS uses it)
-	 */
-	if (ap->a_eofflag) {
-		if (dep->de_FileSize - (offset - bias) <= 0)
-			*ap->a_eofflag = 1;
-		else
-			*ap->a_eofflag = 0;
-	}
 	return (error);
 }
 
diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c
index 56bf766ef801..227e2b93883e 100644
--- a/sys/fs/p9fs/p9fs_vnops.c
+++ b/sys/fs/p9fs/p9fs_vnops.c
@@ -1784,6 +1784,9 @@ p9fs_readdir(struct vop_readdir_args *ap)
 		return (EBADF);
 	}
 
+	if (ap->a_eofflag != NULL)
+		*ap->a_eofflag = 0;
+
 	io_buffer = uma_zalloc(p9fs_io_buffer_zone, M_WAITOK);
 
 	/* We haven't reached the end yet. read more. */
@@ -1801,8 +1804,11 @@ p9fs_readdir(struct vop_readdir_args *ap)
 		count = p9_client_readdir(vofid, (char *)io_buffer,
 		    diroffset, count);
 
-		if (count == 0)
+		if (count == 0) {
+			if (ap->a_eofflag != NULL)
+				*ap->a_eofflag = 1;
 			break;
+		}
 
 		if (count < 0) {
 			error = EIO;
diff --git a/sys/fs/udf/ecma167-udf.h b/sys/fs/udf/ecma167-udf.h
index 839bbec08254..19e114763cac 100644
--- a/sys/fs/udf/ecma167-udf.h
+++ b/sys/fs/udf/ecma167-udf.h
@@ -243,7 +243,7 @@ struct part_map_spare {
 	uint8_t			n_st;	/* Number of Sparing Tables */
 	uint8_t			reserved1;
 	uint32_t		st_size;
-	uint32_t		st_loc[1];
+	uint32_t		st_loc[];
 } __packed;
 
 union udf_pmap {
@@ -266,7 +266,7 @@ struct udf_sparing_table {
 	uint16_t		rt_l;	/* Relocation Table len */
 	uint8_t			reserved[2];
 	uint32_t		seq_num;
-	struct spare_map_entry	entries[1];
+	struct spare_map_entry	entries[];
 } __packed;
 
 /* Partition Descriptor [3/10.5] */
diff --git a/sys/fs/udf/udf_vfsops.c b/sys/fs/udf/udf_vfsops.c
index c7438147c0a0..c5ef1f686093 100644
--- a/sys/fs/udf/udf_vfsops.c
+++ b/sys/fs/udf/udf_vfsops.c
@@ -81,6 +81,7 @@
 #include <sys/fcntl.h>
 #include <sys/iconv.h>
 #include <sys/kernel.h>
+#include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
@@ -729,7 +730,7 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 	struct ifid *ifhp;
 	struct vnode *nvp;
 	struct udf_node *np;
-	off_t fsize;
+	uint64_t fsize;
 	int error;
 
 	ifhp = (struct ifid *)fhp;
@@ -741,6 +742,10 @@ udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 
 	np = VTON(nvp);
 	fsize = le64toh(np->fentry->inf_len);
+	if (fsize > OFF_MAX) {
+		*vpp = NULLVP;
+		return (EIO);
+	}
 
 	*vpp = nvp;
 	vnode_create_vobject(*vpp, fsize, curthread);
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index 88bf4917a851..37889241e8c3 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -39,6 +39,7 @@
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/iconv.h>
+#include <sys/limits.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
@@ -182,11 +183,14 @@ udf_access(struct vop_access_args *a)
 }
 
 static int
-udf_open(struct vop_open_args *ap) {
+udf_open(struct vop_open_args *ap)
+{
 	struct udf_node *np = VTON(ap->a_vp);
-	off_t fsize;
+	uint64_t fsize;
 
 	fsize = le64toh(np->fentry->inf_len);
+	if (fsize > OFF_MAX)
+		return (EIO);
 	vnode_create_vobject(ap->a_vp, fsize, ap->a_td);
 	return 0;
 }
@@ -314,12 +318,13 @@ udf_getattr(struct vop_getattr_args *a)
 		 * that directories consume at least one logical block,
 		 * make it appear so.
 		 */
-		if (fentry->logblks_rec != 0) {
-			vap->va_size =
-			    le64toh(fentry->logblks_rec) * node->udfmp->bsize;
-		} else {
+		vap->va_size = le64toh(fentry->logblks_rec);
+		if (vap->va_size == 0)
 			vap->va_size = node->udfmp->bsize;
-		}
+		else if (vap->va_size > UINT64_MAX / node->udfmp->bsize)
+			vap->va_size = UINT64_MAX;
+		else
+			vap->va_size *= node->udfmp->bsize;
 	} else {
 		vap->va_size = le64toh(fentry->inf_len);
 	}
@@ -446,6 +451,7 @@ udf_read(struct vop_read_args *ap)
 	struct buf *bp;
 	uint8_t *data;
 	daddr_t lbn, rablock;
+	uint64_t len;
 	off_t diff, fsize;
 	ssize_t n;
 	int error = 0;
@@ -471,7 +477,12 @@ udf_read(struct vop_read_args *ap)
 		return (error);
 	}
 
-	fsize = le64toh(node->fentry->inf_len);
+	len = le64toh(node->fentry->inf_len);
+	if (len > OFF_MAX) {
+		/* too big, just cap to the requested length */
+		len = uio->uio_resid;
+	}
+	fsize = len;
 	udfmp = node->udfmp;
 	do {
 		lbn = lblkno(udfmp, uio->uio_offset);
@@ -783,6 +794,7 @@ udf_readdir(struct vop_readdir_args *a)
 	struct udf_uiodir uiodir;
 	struct udf_dirstream *ds;
 	uint64_t *cookies = NULL;
+	uint64_t len;
 	int ncookies;
 	int error = 0;
 
@@ -811,8 +823,12 @@ udf_readdir(struct vop_readdir_args *a)
 	 * Iterate through the file id descriptors.  Give the parent dir
 	 * entry special attention.
 	 */
-	ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len),
-	    node->udfmp);
+	len = le64toh(node->fentry->inf_len);
+	if (len > INT_MAX) {
+		/* too big, just cap to INT_MAX */
+		len = INT_MAX;
+	}
+	ds = udf_opendir(node, uio->uio_offset, len, node->udfmp);
 
 	while ((fid = udf_getfid(ds)) != NULL) {
 		/* XXX Should we return an error on a bad fid? */
@@ -904,7 +920,8 @@ udf_readlink(struct vop_readlink_args *ap)
 	struct udf_node *node;
 	void *buf;
 	char *cp;
-	int error, len, root;
+	uint64_t len;
+	int error, root;
 
 	/*
 	 * A symbolic link in UDF is a list of variable-length path
@@ -914,6 +931,8 @@ udf_readlink(struct vop_readlink_args *ap)
 	vp = ap->a_vp;
 	node = VTON(vp);
 	len = le64toh(node->fentry->inf_len);
+	if (len > MAXPATHLEN)
+		return (EIO);
 	buf = malloc(len, M_DEVBUF, M_WAITOK);
 	iov[0].iov_len = len;
 	iov[0].iov_base = buf;
@@ -1116,13 +1135,14 @@ udf_lookup(struct vop_cachedlookup_args *a)
 	struct udf_mnt *udfmp;
 	struct fileid_desc *fid = NULL;
 	struct udf_dirstream *ds;
+	uint64_t fsize;
 	u_long nameiop;
 	u_long flags;
 	char *nameptr;
 	long namelen;
 	ino_t id = 0;
 	int offset, error = 0;
-	int fsize, lkflags, ltype, numdirpasses;
+	int lkflags, ltype, numdirpasses;
 
 	dvp = a->a_dvp;
 	node = VTON(dvp);
@@ -1133,6 +1153,10 @@ udf_lookup(struct vop_cachedlookup_args *a)
 	nameptr = a->a_cnp->cn_nameptr;
 	namelen = a->a_cnp->cn_namelen;
 	fsize = le64toh(node->fentry->inf_len);
+	if (fsize > INT_MAX) {
+		/* too big, just cap to INT_MAX */
+		fsize = INT_MAX;
+	}
 
 	/*
 	 * If this is a LOOKUP and we've already partially searched through
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index e7d460af21d4..f577cd07ac7c 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -17,6 +17,8 @@
 # in NOTES.
 #
 
+#NO_UNIVERSE
+
 cpu		I486_CPU
 cpu		I586_CPU
 cpu		I686_CPU
diff --git a/sys/i386/conf/GENERIC-NODEBUG b/sys/i386/conf/GENERIC-NODEBUG
index ea07613a796f..a93304481b5f 100644
--- a/sys/i386/conf/GENERIC-NODEBUG
+++ b/sys/i386/conf/GENERIC-NODEBUG
@@ -25,6 +25,8 @@
 # in NOTES.
 #
 
+#NO_UNIVERSE
+
 include GENERIC
 include "std.nodebug"
 
diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT
index 41207eb63cb9..2e947202f723 100644
--- a/sys/i386/conf/LINT
+++ b/sys/i386/conf/LINT
@@ -1,3 +1,4 @@
+#NO_UNIVERSE
 
 include "../../conf/NOTES"
 include "../../x86/conf/NOTES"
diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL
index 2a06eb84bff8..8019617ca4d4 100644
--- a/sys/i386/conf/MINIMAL
+++ b/sys/i386/conf/MINIMAL
@@ -31,6 +31,8 @@
 # in NOTES.
 #
 
+#NO_UNIVERSE
+
 cpu		I486_CPU
 cpu		I586_CPU
 cpu		I686_CPU
diff --git a/sys/i386/conf/PAE b/sys/i386/conf/PAE
index a39d32d77106..72af9e9a9eec 100644
--- a/sys/i386/conf/PAE
+++ b/sys/i386/conf/PAE
@@ -2,6 +2,8 @@
 # PAE -- Generic kernel configuration file for FreeBSD/i386 PAE
 #
 
+#NO_UNIVERSE
+
 include GENERIC
 
 ident		PAE-GENERIC
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 465b4d0f365b..b44f5e08bbcf 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -876,14 +876,16 @@ __CONCAT(PMTYPE, init_pat)(void)
 
 #ifdef PMAP_PAE_COMP
 static void *
-pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
-    int wait)
+pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *sflagsp,
+    int flags)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
-	*flags = UMA_SLAB_KERNEL;
+	*sflagsp = UMA_SLAB_KERNEL;
+	/* contig allocations cannot be NEVERFREED */
+	flags &= ~M_NEVERFREED;
 	return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain),
-	    bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
+	    bytes, flags, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
 }
 #endif
 
@@ -5617,6 +5619,8 @@ __CONCAT(PMTYPE, unmapdev)(void *p, vm_size_t size)
 static void
 __CONCAT(PMTYPE, page_set_memattr)(vm_page_t m, vm_memattr_t ma)
 {
+	if (m->md.pat_mode == ma)
+		return;
 
 	m->md.pat_mode = ma;
 	if ((m->flags & PG_FICTITIOUS) != 0)
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 93bdd41d1515..a27ab33b34da 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -557,8 +557,10 @@ open_to_fde_flags(int open_flags, bool sticky_orb)
 		{ .f = O_CLOFORK,		.t = UF_FOCLOSE },
 		{ .f = O_RESOLVE_BENEATH,	.t = UF_RESOLVE_BENEATH },
 	};
+#if defined(__clang__) && __clang_major__ >= 19
 	_Static_assert(open_to_fde_flags_s[nitems(open_to_fde_flags_s) - 1].f ==
 	    O_RESOLVE_BENEATH, "O_RESOLVE_BENEATH must be last, for sticky_orb");
+#endif
 
 	return (flags_trans(open_to_fde_flags_s, nitems(open_to_fde_flags_s) -
 	    (sticky_orb ? 0 : 1), open_flags));
diff --git a/sys/kern/subr_asan.c b/sys/kern/subr_asan.c
index 0edb631d1475..464efda1e91a 100644
--- a/sys/kern/subr_asan.c
+++ b/sys/kern/subr_asan.c
@@ -263,8 +263,7 @@ kasan_mark(const void *addr, size_t size, size_t redzsize, uint8_t code)
 	if (__predict_false(!kasan_enabled))
 		return;
 
-	if ((vm_offset_t)addr >= DMAP_MIN_ADDRESS &&
-	    (vm_offset_t)addr < DMAP_MAX_ADDRESS)
+	if (kasan_md_unsupported((vm_offset_t)addr))
 		return;
 
 	KASSERT((vm_offset_t)addr >= VM_MIN_KERNEL_ADDRESS &&
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 18388ae5f232..bac7d0080c71 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -338,8 +338,9 @@ ast_handler(struct thread *td, struct trapframe *framep, bool dtor)
 		td->td_ast = 0;
 	}
 
-	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
-            td->td_proc->p_comm);
+	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td,
+	     td->td_proc == NULL ? -1 : td->td_proc->p_pid,
+	     td->td_proc == NULL ? "" : td->td_proc->p_comm);
 	KASSERT(framep == NULL || TRAPF_USERMODE(framep),
 	    ("ast in kernel mode"));
 
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 94e44d888181..b472aaea89e6 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -2309,6 +2309,12 @@ sys_exterrctl(struct thread *td, struct exterrctl_args *uap)
 			return (EINVAL);
 		td->td_pflags2 &= ~TDP2_UEXTERR;
 		return (0);
+	case EXTERRCTL_UD:
+		/*
+		 * Important: this code must always return EINVAL and never any
+		 * extended error, for testing purposes.
+		 */
+		/* FALLTHROUGH */
 	default:
 		return (EINVAL);
 	}
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 3d455b3874cc..89c1d779f04c 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -332,7 +332,8 @@ SDT_PROBE_DEFINE2(vfs, namecache, evict_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, symlink, alloc__fail, "size_t");
 
-SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool");
+SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata *", "int",
+    "enum cache_fpl_status");
 SDT_PROBE_DECLARE(vfs, namei, lookup, entry);
 SDT_PROBE_DECLARE(vfs, namei, lookup, return);
 
@@ -6420,15 +6421,11 @@ out:
 	cache_fpl_smr_assert_not_entered(&fpl);
 	cache_fpl_assert_status(&fpl);
 	*status = fpl.status;
-	if (SDT_PROBES_ENABLED()) {
-		SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
-		if (fpl.status == CACHE_FPL_STATUS_HANDLED)
-			SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, true,
-			    ndp);
-	}
-
+	SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
 	if (__predict_true(fpl.status == CACHE_FPL_STATUS_HANDLED)) {
 		MPASS(error != CACHE_FPL_FAILED);
+		SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, true,
+		    ndp);
 		if (error != 0) {
 			cache_fpl_cleanup_cnp(fpl.cnp);
 			MPASS(fpl.dvp == NULL);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 9922796f8a1d..7cb6e2124326 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -326,6 +326,7 @@ SUBDIR=	\
 	proto \
 	pseudofs \
 	${_pst} \
+	${_pt} \
 	pty  \
 	puc \
 	pwm \
@@ -842,6 +843,7 @@ _iwx=		iwx
 _ixl=		ixl
 _nvdimm=	nvdimm
 _pms=		pms
+_pt=		pt
 _qat=		qat
 .if ${MK_SOURCELESS_UCODE} != "no"
 _qatfw=		qatfw
diff --git a/sys/modules/pt/Makefile b/sys/modules/pt/Makefile
new file mode 100644
index 000000000000..416b072face9
--- /dev/null
+++ b/sys/modules/pt/Makefile
@@ -0,0 +1,8 @@
+
+.PATH: ${SRCTOP}/sys/amd64/pt
+
+KMOD=	pt
+SRCS=	pt.c pt.h device_if.h bus_if.h
+SRCS+=	opt_hwpmc_hooks.h opt_kstack_pages.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/qlnx/qlnxe/Makefile b/sys/modules/qlnx/qlnxe/Makefile
index 3d8415cf0e57..2a44ae6ddde5 100644
--- a/sys/modules/qlnx/qlnxe/Makefile
+++ b/sys/modules/qlnx/qlnxe/Makefile
@@ -58,6 +58,7 @@ SRCS+=qlnx_rdma.c
 
 SRCS+=qlnx_ioctl.c
 SRCS+=qlnx_os.c
+SRCS+=opt_inet.h
 
 SRCS+=	${LINUXKPI_GENSRCS}
 
diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h
index cf4f75bd0b6c..01485cf26e06 100644
--- a/sys/net/ethernet.h
+++ b/sys/net/ethernet.h
@@ -62,6 +62,8 @@ struct ether_header {
 	u_char	ether_shost[ETHER_ADDR_LEN];
 	u_short	ether_type;
 } __packed;
+_Static_assert(sizeof(struct ether_header) == ETHER_HDR_LEN,
+    "size of struct ether_header is wrong");
 
 /*
  * Structure of a 48-bit Ethernet address.
@@ -69,6 +71,8 @@ struct ether_header {
 struct ether_addr {
 	u_char octet[ETHER_ADDR_LEN];
 } __packed;
+_Static_assert(sizeof(struct ether_addr) == ETHER_ADDR_LEN,
+    "size of struct ether_addr is wrong");
 
 #define	ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
 #define	ETHER_IS_IPV6_MULTICAST(addr) \
@@ -112,6 +116,8 @@ struct ether_vlan_header {
 	uint16_t evl_tag;
 	uint16_t evl_proto;
 } __packed;
+_Static_assert(sizeof(struct ether_vlan_header) == ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN,
+    "size of struct ether_vlan_header is wrong");
 
 #define	EVL_VLID_MASK		0x0FFF
 #define	EVL_PRI_MASK		0xE000
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 7be4dfac23e7..3ae0c01c0efc 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -92,11 +92,6 @@
 
 #include <crypto/sha1.h>
 
-#ifdef CTASSERT
-CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
-CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
-#endif
-
 VNET_DEFINE(pfil_head_t, link_pfil_head);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c
index 9867a718e148..5b52bfa80e3b 100644
--- a/sys/net/if_lagg.c
+++ b/sys/net/if_lagg.c
@@ -718,6 +718,7 @@ lagg_capabilities(struct lagg_softc *sc)
 		sc->sc_ifp->if_capenable = ena;
 		sc->sc_ifp->if_capenable2 = ena2;
 		sc->sc_ifp->if_hwassist = hwa;
+		(void)if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax);
 		getmicrotime(&sc->sc_ifp->if_lastchange);
 
 		if (sc->sc_ifflags & IFF_DEBUG)
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 36fab1a03ee6..452a8eb4024b 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1370,7 +1370,6 @@ struct pf_kruleset {
 		struct pf_krulequeue	 queues[2];
 		struct {
 			struct pf_krulequeue	*ptr;
-			struct pf_krule		**ptr_array;
 			u_int32_t		 rcount;
 			u_int32_t		 ticket;
 			int			 open;
@@ -2500,7 +2499,7 @@ int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
 void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
 	    u_int64_t, int, int, int);
 int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t,
-	    pf_addr_filter_func_t);
+	    pf_addr_filter_func_t, bool);
 void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
 struct pfr_ktable *
 	pfr_attach_table(struct pf_kruleset *, char *);
@@ -2534,6 +2533,8 @@ int	pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int);
 int	pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int);
 int	pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *,
 	    int *, u_int32_t, int);
+struct pfr_ktable
+	*pfr_ktable_select_active(struct pfr_ktable *);
 
 MALLOC_DECLARE(PFI_MTYPE);
 VNET_DECLARE(struct pfi_kkif *,		 pfi_all);
@@ -2712,7 +2713,6 @@ u_short			 pf_map_addr(u_int8_t, struct pf_krule *,
 u_short			 pf_map_addr_sn(u_int8_t, struct pf_krule *,
 			    struct pf_addr *, struct pf_addr *,
 			    struct pfi_kkif **nkif, struct pf_addr *,
-			    struct pf_ksrc_node **, struct pf_srchash **,
 			    struct pf_kpool *, pf_sn_types_t);
 int			 pf_get_transaddr_af(struct pf_krule *,
 			    struct pf_pdesc *);
diff --git a/sys/net80211/ieee80211_hostap.c b/sys/net80211/ieee80211_hostap.c
index c5a478533313..9074878e17e4 100644
--- a/sys/net80211/ieee80211_hostap.c
+++ b/sys/net80211/ieee80211_hostap.c
@@ -2214,12 +2214,9 @@ hostap_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m0,
 
 		/* VHT */
 		if (IEEE80211_IS_CHAN_VHT(ni->ni_chan) &&
-		    vhtcap != NULL &&
-		    vhtinfo != NULL) {
-			/* XXX TODO; see below */
-			net80211_vap_printf(vap, "%s: VHT TODO!\n", __func__);
+		    vhtcap != NULL) {
 			ieee80211_vht_node_init(ni);
-			ieee80211_vht_update_cap(ni, vhtcap, vhtinfo);
+			ieee80211_vht_update_cap(ni, vhtcap);
 		} else if (ni->ni_flags & IEEE80211_NODE_VHT)
 			ieee80211_vht_node_cleanup(ni);
 
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index 5ec80e3646b8..c28f124648a1 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -1952,6 +1952,11 @@ do {									\
 		_RETURN_CHAN_BITS(0);
 
 	/*
+	 * TODO: should we bail out if there's no htinfo?
+	 * Or just treat it as if we can't do the HT20/HT40 check?
+	 */
+
+	/*
 	 * The original code was based on
 	 * 802.11ac-2013, Table 8-183x-VHT Operation Information subfields.
 	 * 802.11-2020, Table 9-274-VHT Operation Information subfields
@@ -1962,8 +1967,12 @@ do {									\
 	 */
 
 	htinfo = (const struct ieee80211_ie_htinfo *)ni->ni_ies.htinfo_ie;
-	ht40 = ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH) ==
-	    IEEE80211_HTINFO_TXWIDTH_2040);
+	if (htinfo != NULL)
+		ht40 = ((htinfo->hi_byte1 & IEEE80211_HTINFO_TXWIDTH) ==
+		    IEEE80211_HTINFO_TXWIDTH_2040);
+	else
+		ht40 = false;
+
 	can_vht160 = can_vht80p80 = can_vht80 = false;
 
 	/* 20 Mhz */
diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index e91977f1ef98..de0b691d4d2a 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -838,12 +838,10 @@ ieee80211_add_vhtinfo(uint8_t *frm, struct ieee80211_node *ni)
 }
 
 void
-ieee80211_vht_update_cap(struct ieee80211_node *ni, const uint8_t *vhtcap_ie,
-    const uint8_t *vhtop_ie)
+ieee80211_vht_update_cap(struct ieee80211_node *ni, const uint8_t *vhtcap_ie)
 {
 
 	ieee80211_parse_vhtcap(ni, vhtcap_ie);
-	ieee80211_parse_vhtopmode(ni, vhtop_ie);
 }
 
 static struct ieee80211_channel *
diff --git a/sys/net80211/ieee80211_vht.h b/sys/net80211/ieee80211_vht.h
index 2964de63c343..a1529df4a85b 100644
--- a/sys/net80211/ieee80211_vht.h
+++ b/sys/net80211/ieee80211_vht.h
@@ -52,8 +52,7 @@ uint8_t *	ieee80211_add_vhtinfo(uint8_t *frm, struct ieee80211_node *);
 uint8_t *ieee80211_add_vhtcap_ch(uint8_t *, struct ieee80211vap *,
     struct ieee80211_channel *);
 
-void	ieee80211_vht_update_cap(struct ieee80211_node *,
-	    const uint8_t *, const uint8_t *);
+void	ieee80211_vht_update_cap(struct ieee80211_node *, const uint8_t *);
 
 struct ieee80211_channel *
 	ieee80211_vht_adjust_channel(struct ieee80211com *,
diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c
index 0379ef7c789a..c90a1213bd66 100644
--- a/sys/netinet6/raw_ip6.c
+++ b/sys/netinet6/raw_ip6.c
@@ -765,8 +765,7 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 	}
 	if (ifa != NULL &&
 	    ((struct in6_ifaddr *)ifa)->ia6_flags &
-	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
-	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
+	    (IN6_IFF_NOTREADY|IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
 		NET_EPOCH_EXIT(et);
 		return (EADDRNOTAVAIL);
 	}
diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c
index 6bacc68b7441..92d0201b398a 100644
--- a/sys/netipsec/ipsec.c
+++ b/sys/netipsec/ipsec.c
@@ -636,8 +636,10 @@ ipsec4_in_reject1(const struct mbuf *m, struct ip *ip1, struct inpcb *inp)
 
 #ifdef IPSEC_OFFLOAD
 	tag = ipsec_accel_input_tag_lookup(m);
-	if (tag != NULL)
-		return (0);
+	if (tag != NULL) {
+		tag->tag.m_tag_id = PACKET_TAG_IPSEC_IN_DONE;
+		__DECONST(struct mbuf *, m)->m_flags |= M_DECRYPTED;
+	}
 #endif
 
 	if (ip1 == NULL) {
diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c
index 467d5ded1d7a..8a09d5f37b4a 100644
--- a/sys/netipsec/ipsec_offload.c
+++ b/sys/netipsec/ipsec_offload.c
@@ -94,6 +94,7 @@ struct ifp_handle_sav {
 	size_t hdr_ext_size;
 	uint64_t cnt_octets;
 	uint64_t cnt_allocs;
+	struct xform_history xfh;
 };
 
 #define	IFP_HS_HANDLED	0x00000001
@@ -159,6 +160,8 @@ static void ipsec_accel_drv_sa_lifetime_update_impl(struct secasvar *sav,
 static int ipsec_accel_drv_sa_lifetime_fetch_impl(struct secasvar *sav,
     if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs);
 static void ipsec_accel_ifdetach_event(void *arg, struct ifnet *ifp);
+static bool ipsec_accel_fill_xh_impl(if_t ifp, uint32_t drv_spi,
+    struct xform_history *xh);
 
 static void
 ipsec_accel_init(void *arg)
@@ -185,6 +188,7 @@ ipsec_accel_init(void *arg)
 	    ipsec_accel_drv_sa_lifetime_update_impl;
 	ipsec_accel_drv_sa_lifetime_fetch_p =
 	    ipsec_accel_drv_sa_lifetime_fetch_impl;
+	ipsec_accel_fill_xh_p = ipsec_accel_fill_xh_impl;
 	pctrie_init(&drv_spi_pctrie);
 	ipsec_accel_ifdetach_event_tag = EVENTHANDLER_REGISTER(
 	    ifnet_departure_event, ipsec_accel_ifdetach_event, NULL,
@@ -209,6 +213,7 @@ ipsec_accel_fini(void *arg)
 	ipsec_accel_on_ifdown_p = NULL;
 	ipsec_accel_drv_sa_lifetime_update_p = NULL;
 	ipsec_accel_drv_sa_lifetime_fetch_p = NULL;
+	ipsec_accel_fill_xh_p = NULL;
 	ipsec_accel_sync_imp();
 	clean_unrhdr(drv_spi_unr);	/* avoid panic, should go later */
 	clear_unrhdr(drv_spi_unr);
@@ -412,6 +417,10 @@ ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp,
 	ihs->ifdata = priv;
 	ihs->flags = flags;
 	ihs->hdr_ext_size = esp_hdrsiz(sav);
+	memcpy(&ihs->xfh.dst, &sav->sah->saidx.dst, sizeof(ihs->xfh.dst));
+	ihs->xfh.spi = sav->spi;
+	ihs->xfh.proto = sav->sah->saidx.proto;
+	ihs->xfh.mode = sav->sah->saidx.mode;
 	mtx_lock(&ipsec_accel_sav_tmp);
 	CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) {
 		if (i->ifp == ifp) {
@@ -1162,4 +1171,20 @@ ipsec_accel_key_setaccelif_impl(struct secasvar *sav)
 	return (m);
 }
 
+static bool
+ipsec_accel_fill_xh_impl(if_t ifp, uint32_t drv_spi, struct xform_history *xh)
+{
+	struct ifp_handle_sav *i;
+
+	if (drv_spi < IPSEC_ACCEL_DRV_SPI_MIN ||
+	    drv_spi > IPSEC_ACCEL_DRV_SPI_MAX)
+		return (false);
+
+	i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi);
+	if (i == NULL)
+		return (false);
+	memcpy(xh, &i->xfh, sizeof(*xh));
+	return (true);
+}
+
 #endif	/* IPSEC_OFFLOAD */
diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h
index 904fe6252396..ae60eaa8ae78 100644
--- a/sys/netipsec/ipsec_offload.h
+++ b/sys/netipsec/ipsec_offload.h
@@ -30,6 +30,7 @@
 #include <sys/errno.h>
 #include <net/if.h>
 #include <net/if_var.h>
+#include <netipsec/xform.h>
 
 struct secpolicy;
 struct secasvar;
@@ -42,6 +43,7 @@ struct ipsec_accel_out_tag {
 
 struct ipsec_accel_in_tag {
 	struct m_tag tag;
+	struct xform_history xh; /* Must be first to mimic IPSEC_IN_DONE */
 	uint16_t drv_spi;
 };
 
@@ -66,6 +68,8 @@ extern void (*ipsec_accel_drv_sa_lifetime_update_p)(struct secasvar *sav,
     if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs);
 extern int (*ipsec_accel_drv_sa_lifetime_fetch_p)(struct secasvar *sav,
     if_t ifp, u_int drv_spi, uint64_t *octets, uint64_t *allocs);
+extern bool (*ipsec_accel_fill_xh_p)(if_t ifp, uint32_t drv_spi,
+    struct xform_history *xh);
 
 #ifdef IPSEC_OFFLOAD
 /*
@@ -158,6 +162,16 @@ ipsec_accel_key_setaccelif(struct secasvar *sav)
 	return (NULL);
 }
 
+static inline bool
+ipsec_accel_fill_xh(if_t ifp, uint32_t drv_spi, struct xform_history *xh)
+{
+	bool (*p)(if_t ifp, uint32_t drv_spi, struct xform_history *xh);
+
+	p = atomic_load_ptr(&ipsec_accel_fill_xh_p);
+	if (p != NULL)
+		return (p(ifp, drv_spi, xh));
+	return (false);
+}
 
 #else
 #define	ipsec_accel_sa_newkey(a)
@@ -168,6 +182,7 @@ ipsec_accel_key_setaccelif(struct secasvar *sav)
 #define	ipsec_accel_sync()
 #define	ipsec_accel_is_accel_sav(a)
 #define	ipsec_accel_key_setaccelif(a)
+#define	ipsec_accel_fill_xh(a, b, c)	(false)
 #endif
 
 void ipsec_accel_forget_sav_impl(struct secasvar *sav);
@@ -180,6 +195,7 @@ bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m,
     struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af,
     int mtu, int *hwassist);
 void ipsec_accel_forget_sav(struct secasvar *sav);
+struct xform_history;
 #else
 #define	ipsec_accel_input(a, b, c) (ENXIO)
 #define	ipsec_accel_output(a, b, c, d, e, f, g, h) ({	\
diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c
index ae67d83c6d13..4ba1b49c24f0 100644
--- a/sys/netipsec/key.c
+++ b/sys/netipsec/key.c
@@ -114,6 +114,8 @@ void (*ipsec_accel_drv_sa_lifetime_update_p)(struct secasvar *sav, if_t ifp,
     u_int drv_spi, uint64_t octets, uint64_t allocs);
 int (*ipsec_accel_drv_sa_lifetime_fetch_p)(struct secasvar *sav, if_t ifp,
     u_int drv_spi, uint64_t *octets, uint64_t *allocs);
+bool (*ipsec_accel_fill_xh_p)(if_t ifp, uint32_t drv_spi,
+    struct xform_history *xh);
 #endif
 
 #define FULLMASK	0xff
diff --git a/sys/netlink/netlink_message_parser.h b/sys/netlink/netlink_message_parser.h
index 8492ecb3021b..720317ed74f3 100644
--- a/sys/netlink/netlink_message_parser.h
+++ b/sys/netlink/netlink_message_parser.h
@@ -209,7 +209,8 @@ int nlattr_get_nested(struct nlattr *nla, struct nl_pstate *npt,
 int nlattr_get_nested_ptr(struct nlattr *nla, struct nl_pstate *npt,
     const void *arg, void *target);
 
-bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...);
+bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...)
+	__printflike(2, 3);
 
 #define	NLMSG_REPORT_ERR_MSG(_npt, _fmt, ...) {	\
 	nlmsg_report_err_msg(_npt, _fmt, ## __VA_ARGS__); \
diff --git a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c
index 04850549db98..6eb6cf2a7a47 100644
--- a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c
+++ b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c
@@ -463,13 +463,14 @@ ipf_send_ip(fr_info_t *fin, mb_t *m)
 int
 ipf_send_icmp_err(int type, fr_info_t *fin, int dst)
 {
-	int err, hlen, xtra, iclen, ohlen, avail, code;
+	int err, hlen, xtra, iclen, ohlen, avail;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
+	int code;
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
@@ -477,8 +478,8 @@ ipf_send_icmp_err(int type, fr_info_t *fin, int dst)
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return (-1);
 
-	code = fin->fin_icode;
 #ifdef USE_INET6
+	code = fin->fin_icode;
 	/* See NetBSD ip_fil_netbsd.c r1.4: */
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return (-1);
diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c
index 0a84f9d680ac..cb96d2fcc44c 100644
--- a/sys/netpfil/pf/if_pflog.c
+++ b/sys/netpfil/pf/if_pflog.c
@@ -284,9 +284,9 @@ pflog_packet(uint8_t action, u_int8_t reason,
 	 * state lock, since this leads to unsafe LOR.
 	 * These conditions are very very rare, however.
 	 */
-	if (trigger->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done && lookupsafe)
+	if (trigger->log & PF_LOG_USER && !pd->lookup.done && lookupsafe)
 		pd->lookup.done = pf_socket_lookup(pd);
-	if (pd->lookup.done > 0)
+	if (trigger->log & PF_LOG_USER && pd->lookup.done > 0)
 		hdr.uid = pd->lookup.uid;
 	else
 		hdr.uid = -1;
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 2391edaf1a5a..4e03584b8f85 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -532,6 +532,7 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
 	struct pf_kpooladdr	*rpool_first;
 	int			 error;
 	uint8_t			 rt = 0;
+	int			 n = 0;
 
 	PF_RULES_RASSERT();
 
@@ -557,10 +558,12 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
 	 */
 	if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) &&
 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) <
-	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
-		r = pf_main_ruleset.rules[
-		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->pfs_1301.rule)];
-	else
+	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) {
+		TAILQ_FOREACH(r, pf_main_ruleset.rules[
+		    PF_RULESET_FILTER].active.ptr, entries)
+			if (ntohl(sp->pfs_1301.rule) == n++)
+				break;
+	} else
 		r = &V_pf_default_rule;
 
 	/*
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index d5f01e5c4956..009f7e4d78b1 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -5901,18 +5901,17 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
 		M_SETFIB(pd->m, pd->act.rtableid);
 
 	if (r->rt) {
-		struct pf_ksrc_node	*sn = NULL;
-		struct pf_srchash	*snh = NULL;
 		/*
 		 * Set act.rt here instead of in pf_rule_to_actions() because
 		 * it is applied only from the last pass rule.
 		 */
 		pd->act.rt = r->rt;
-		/* Don't use REASON_SET, pf_map_addr increases the reason counters */
-		ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
-		    &pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE);
-		if (ctx.reason != 0)
+		if ((transerror = pf_map_addr_sn(pd->af, r, pd->src,
+		    &pd->act.rt_addr, &pd->act.rt_kif, NULL, &(r->route),
+		    PF_SN_ROUTE)) != PFRES_MATCH) {
+			REASON_SET(&ctx.reason, transerror);
 			goto cleanup;
+		}
 	}
 
 	if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
@@ -6056,9 +6055,16 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
 	/* src node for translation rule */
 	if (ctx->nr != NULL) {
 		KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__));
+		/*
+		 * The NAT addresses are chosen during ruleset parsing.
+		 * The new afto code stores post-nat addresses in nsaddr.
+		 * The old nat code (also used for new nat-to rules) creates
+		 * state keys and stores addresses in them.
+		 */
 		if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) &&
 		    (sn_reason = pf_insert_src_node(sns, snhs, ctx->nr,
-		    &ctx->sk->addr[pd->sidx], pd->af, &ctx->nk->addr[1], NULL,
+		    ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af,
+		    ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL,
 		    PF_SN_NAT)) != 0 ) {
 			REASON_SET(&ctx->reason, sn_reason);
 			goto csfailed;
@@ -6213,7 +6219,7 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx,
 	if (ctx->tag > 0)
 		s->tag = ctx->tag;
 	if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) ==
-	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
+	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
 		pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
 		pf_undo_nat(ctx->nr, pd, bip_sum);
 		s->src.seqhi = arc4random();
@@ -9062,6 +9068,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
 		goto bad;
 	}
 
+	if (r->rt == PF_DUPTO)
+		skip_test = true;
+
 	if (pd->dir == PF_IN && !skip_test) {
 		if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp,
 		    &pd->act) != PF_PASS) {
@@ -9364,6 +9373,9 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp,
 		goto bad;
 	}
 
+	if (r->rt == PF_DUPTO)
+		skip_test = true;
+
 	if (pd->dir == PF_IN && !skip_test) {
 		if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT,
 		    ifp, &m0, inp, &pd->act) != PF_PASS) {
@@ -10052,6 +10064,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 	pd->didx = (dir == PF_IN) ? 1 : 0;
 	pd->af = pd->naf = af;
 
+	PF_RULES_ASSERT();
+
 	TAILQ_INIT(&pd->sctp_multihome_jobs);
 	if (default_actions != NULL)
 		memcpy(&pd->act, default_actions, sizeof(pd->act));
@@ -10127,6 +10141,12 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		}
 
 		h = mtod(pd->m, struct ip6_hdr *);
+		if (pd->m->m_pkthdr.len <
+		    sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
+			*action = PF_DROP;
+			REASON_SET(reason, PFRES_SHORT);
+			return (-1);
+		}
 
 		if (pf_walk_header6(pd, h, reason) != PF_PASS) {
 			*action = PF_DROP;
@@ -10465,35 +10485,30 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
 	PF_RULES_RLOCK_TRACKER;
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir));
 	M_ASSERTPKTHDR(*m0);
+	NET_EPOCH_ASSERT();
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
-	PF_RULES_RLOCK();
-
 	kif = (struct pfi_kkif *)ifp->if_pf_kif;
 
 	if (__predict_false(kif == NULL)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("%s: kif == NULL, if_xname %s\n",
 		    __func__, ifp->if_xname));
-		PF_RULES_RUNLOCK();
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
-		PF_RULES_RUNLOCK();
 		return (PF_PASS);
 	}
 
 	if ((*m0)->m_flags & M_SKIP_FIREWALL) {
-		PF_RULES_RUNLOCK();
 		return (PF_PASS);
 	}
 
 	if (__predict_false(! M_WRITABLE(*m0))) {
 		*m0 = m_unshare(*m0, M_NOWAIT);
 		if (*m0 == NULL) {
-			PF_RULES_RUNLOCK();
 			return (PF_DROP);
 		}
 	}
@@ -10506,12 +10521,10 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
 		ifp = ifnet_byindexgen(pd.pf_mtag->if_index,
 		    pd.pf_mtag->if_idxgen);
 		if (ifp == NULL || ifp->if_flags & IFF_DYING) {
-			PF_RULES_RUNLOCK();
 			m_freem(*m0);
 			*m0 = NULL;
 			return (PF_PASS);
 		}
-		PF_RULES_RUNLOCK();
 		(ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL);
 		*m0 = NULL;
 		return (PF_PASS);
@@ -10526,11 +10539,12 @@ pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0
 		/* But only once. We may see the packet multiple times (e.g.
 		 * PFIL_IN/PFIL_OUT). */
 		pf_dummynet_flag_remove(pd.m, pd.pf_mtag);
-		PF_RULES_RUNLOCK();
 
 		return (PF_PASS);
 	}
 
+	PF_RULES_RLOCK();
+
 	if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason,
 		kif, default_actions) == -1) {
 		if (action != PF_PASS)
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 2009d2907985..cfff58064922 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -140,7 +140,7 @@ enum	{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
 
 #define	PF_LOG			0x01
 #define	PF_LOG_ALL		0x02
-#define	PF_LOG_SOCKET_LOOKUP	0x04
+#define	PF_LOG_USER		0x04
 #define	PF_LOG_FORCE		0x08
 #define	PF_LOG_MATCHES		0x10
 
@@ -490,6 +490,7 @@ struct pf_osfp_ioctl {
 
 #define	PF_ANCHOR_NAME_SIZE	 64
 #define	PF_ANCHOR_MAXPATH	(MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)
+#define	PF_OPTIMIZER_TABLE_PFX	"__automatic_"
 
 struct pf_rule {
 	struct pf_rule_addr	 src;
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index c96741023db9..3caa0d2e3b11 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -1274,7 +1274,9 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
 			PF_MD5_UPD(pfr, addr.iflags);
 			break;
 		case PF_ADDR_TABLE:
-			PF_MD5_UPD(pfr, addr.v.tblname);
+			if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX,
+			    strlen(PF_OPTIMIZER_TABLE_PFX)))
+				PF_MD5_UPD(pfr, addr.v.tblname);
 			break;
 		case PF_ADDR_ADDRMASK:
 			/* XXX ignore af? */
@@ -1357,7 +1359,7 @@ static int
 pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 {
 	struct pf_kruleset	*rs;
-	struct pf_krule		*rule, **old_array, *old_rule;
+	struct pf_krule		*rule, *old_rule;
 	struct pf_krulequeue	*old_rules;
 	struct pf_krule_global  *old_tree;
 	int			 error;
@@ -1382,13 +1384,10 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 	/* Swap rules, keep the old. */
 	old_rules = rs->rules[rs_num].active.ptr;
 	old_rcount = rs->rules[rs_num].active.rcount;
-	old_array = rs->rules[rs_num].active.ptr_array;
 	old_tree = rs->rules[rs_num].active.tree;
 
 	rs->rules[rs_num].active.ptr =
 	    rs->rules[rs_num].inactive.ptr;
-	rs->rules[rs_num].active.ptr_array =
-	    rs->rules[rs_num].inactive.ptr_array;
 	rs->rules[rs_num].active.tree =
 	    rs->rules[rs_num].inactive.tree;
 	rs->rules[rs_num].active.rcount =
@@ -1418,7 +1417,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 	}
 
 	rs->rules[rs_num].inactive.ptr = old_rules;
-	rs->rules[rs_num].inactive.ptr_array = old_array;
 	rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */
 	rs->rules[rs_num].inactive.rcount = old_rcount;
 
@@ -1431,9 +1429,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
 		pf_unlink_rule_locked(old_rules, rule);
 	PF_UNLNKDRULES_UNLOCK();
-	if (rs->rules[rs_num].inactive.ptr_array)
-		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
-	rs->rules[rs_num].inactive.ptr_array = NULL;
 	rs->rules[rs_num].inactive.rcount = 0;
 	rs->rules[rs_num].inactive.open = 0;
 	pf_remove_if_empty_kruleset(rs);
@@ -1456,24 +1451,11 @@ pf_setup_pfsync_matching(struct pf_kruleset *rs)
 		if (rs_cnt == PF_RULESET_SCRUB)
 			continue;
 
-		if (rs->rules[rs_cnt].inactive.ptr_array)
-			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
-		rs->rules[rs_cnt].inactive.ptr_array = NULL;
-
 		if (rs->rules[rs_cnt].inactive.rcount) {
-			rs->rules[rs_cnt].inactive.ptr_array =
-			    mallocarray(rs->rules[rs_cnt].inactive.rcount,
-			    sizeof(struct pf_rule **),
-			    M_TEMP, M_NOWAIT);
-
-			if (!rs->rules[rs_cnt].inactive.ptr_array)
-				return (ENOMEM);
-		}
-
-		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
-		    entries) {
-			pf_hash_rule_rolling(&ctx, rule);
-			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
+			TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
+			    entries) {
+				pf_hash_rule_rolling(&ctx, rule);
+			}
 		}
 	}
 
@@ -2059,6 +2041,19 @@ pf_ioctl_getrules(struct pfioc_rule *pr)
 	return (0);
 }
 
+static int
+pf_validate_range(uint8_t op, uint16_t port[2])
+{
+	uint16_t a = ntohs(port[0]);
+	uint16_t b = ntohs(port[1]);
+
+	if ((op == PF_OP_RRG && a > b) ||  /* 34:12,  i.e. none */
+	    (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */
+	    (op == PF_OP_XRG && a > b))	   /* 34<>22, i.e. all */
+		return 1;
+	return 0;
+}
+
 int
 pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
     uint32_t pool_ticket, const char *anchor, const char *anchor_call,
@@ -2078,6 +2073,11 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
 
 #define	ERROUT(x)	ERROUT_FUNCTION(errout, x)
 
+	if (pf_validate_range(rule->src.port_op, rule->src.port))
+		ERROUT(EINVAL);
+	if (pf_validate_range(rule->dst.port_op, rule->dst.port))
+		ERROUT(EINVAL);
+
 	if (rule->ifname[0])
 		kif = pf_kkif_create(M_WAITOK);
 	if (rule->rcv_ifname[0])
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 308d76c46e5b..26f7ab41eef4 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -80,7 +80,6 @@ static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_c
 			    struct pf_krule *);
 static int		 pf_get_sport(struct pf_pdesc *, struct pf_krule *,
 			    struct pf_addr *, uint16_t *, uint16_t, uint16_t,
-			    struct pf_ksrc_node **, struct pf_srchash **,
 			    struct pf_kpool *, struct pf_udp_mapping **,
 			    pf_sn_types_t);
 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
@@ -291,10 +290,8 @@ pf_match_translation(int rs_num, struct pf_test_ctx *ctx)
 }
 
 static int
-pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
-    struct pf_addr *naddr, uint16_t *nport, uint16_t low,
-    uint16_t high, struct pf_ksrc_node **sn,
-    struct pf_srchash **sh, struct pf_kpool *rpool,
+pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr,
+    uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool,
     struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type)
 {
 	struct pf_state_key_cmp	key;
@@ -322,19 +319,24 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
 		pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af);
 		udp_source.port = pd->nsport;
 		if (udp_mapping) {
+			struct pf_ksrc_node	*sn = NULL;
+			struct pf_srchash	*sh = NULL;
 			*udp_mapping = pf_udp_mapping_find(&udp_source);
 			if (*udp_mapping) {
 				pf_addrcpy(naddr,
 				    &(*udp_mapping)->endpoints[1].addr,
 				    pd->af);
 				*nport = (*udp_mapping)->endpoints[1].port;
-				/* Try to find a src_node as per pf_map_addr(). */
-				if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
+				/*
+				 * Try to find a src_node as per pf_map_addr().
+				 * XXX: Why? This code seems to do nothing.
+				 */
+				if (rpool->opts & PF_POOL_STICKYADDR &&
 				    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
-					*sn = pf_find_src_node(&pd->nsaddr, r,
-					    pd->af, sh, sn_type, false);
-				if (*sn != NULL)
-					PF_SRC_NODE_UNLOCK(*sn);
+					sn = pf_find_src_node(&pd->nsaddr, r,
+					    pd->af, &sh, sn_type, false);
+				if (sn != NULL)
+					PF_SRC_NODE_UNLOCK(sn);
 				return (0);
 			} else {
 				*udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr,
@@ -346,7 +348,7 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
 	}
 
 	if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL, &init_addr,
-	    sn, sh, rpool, sn_type))
+	    rpool, sn_type))
 		goto failed;
 
 	if (pd->proto == IPPROTO_ICMP) {
@@ -470,9 +472,8 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
 			 * pick a different source address since we're out
 			 * of free port choices for the current one.
 			 */
-			(*sn) = NULL;
 			if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, NULL,
-			    &init_addr, sn, sh, rpool, sn_type))
+			    &init_addr, rpool, sn_type))
 				return (1);
 			break;
 		case PF_POOL_NONE:
@@ -503,7 +504,6 @@ pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
 static int
 pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
     struct pf_addr *naddr, uint16_t *nport,
-    struct pf_ksrc_node **sn, struct pf_srchash **sh,
     struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool)
 {
 	uint16_t psmask, low, highmask;
@@ -523,16 +523,14 @@ pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r,
 
 	for (i = cut; i <= ahigh; i++) {
 		low = (i << ashift) | psmask;
-		if (!pf_get_sport(pd, r,
-		    naddr, nport, low, low | highmask, sn, sh, rpool,
-		    udp_mapping, PF_SN_NAT))
+		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
+		    rpool, udp_mapping, PF_SN_NAT))
 			return (0);
 	}
 	for (i = cut - 1; i > 0; i--) {
 		low = (i << ashift) | psmask;
-		if (!pf_get_sport(pd, r,
-		    naddr, nport, low, low | highmask, sn, sh, rpool,
-		    udp_mapping, PF_SN_NAT))
+		if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask,
+		    rpool, udp_mapping, PF_SN_NAT))
 			return (0);
 	}
 	return (1);
@@ -545,6 +543,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 {
 	u_short			 reason = PFRES_MATCH;
 	struct pf_addr		*raddr = NULL, *rmask = NULL;
+	struct pfr_ktable	*kt;
 	uint64_t		 hashidx;
 	int			 cnt;
 
@@ -600,29 +599,25 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 		pf_poolmask(naddr, raddr, rmask, saddr, af);
 		break;
 	case PF_POOL_RANDOM:
-		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
-			cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
-			if (cnt == 0)
-				rpool->tblidx = 0;
+		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
+		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			if (rpool->cur->addr.type == PF_ADDR_TABLE)
+				kt = rpool->cur->addr.p.tbl;
 			else
-				rpool->tblidx = (int)arc4random_uniform(cnt);
-			memset(&rpool->counter, 0, sizeof(rpool->counter));
-			if (pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+				kt = rpool->cur->addr.p.dyn->pfid_kt;
+			kt = pfr_ktable_select_active(kt);
+			if (kt == NULL) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
-			pf_addrcpy(naddr, &rpool->counter, af);
-		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
-			cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+			cnt = kt->pfrkt_cnt;
 			if (cnt == 0)
 				rpool->tblidx = 0;
 			else
 				rpool->tblidx = (int)arc4random_uniform(cnt);
 			memset(&rpool->counter, 0, sizeof(rpool->counter));
-			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af,
-			    pf_islinklocal)) {
+			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
+			    af, pf_islinklocal, false)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
@@ -671,29 +666,25 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 
 		hashidx =
 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
-		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
-			cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
-			if (cnt == 0)
-				rpool->tblidx = 0;
+		if (rpool->cur->addr.type == PF_ADDR_TABLE ||
+		    rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+			if (rpool->cur->addr.type == PF_ADDR_TABLE)
+				kt = rpool->cur->addr.p.tbl;
 			else
-				rpool->tblidx = (int)(hashidx % cnt);
-			memset(&rpool->counter, 0, sizeof(rpool->counter));
-			if (pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+				kt = rpool->cur->addr.p.dyn->pfid_kt;
+			kt = pfr_ktable_select_active(kt);
+			if (kt == NULL) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
-			pf_addrcpy(naddr, &rpool->counter, af);
-		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
-			cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+			cnt = kt->pfrkt_cnt;
 			if (cnt == 0)
 				rpool->tblidx = 0;
 			else
 				rpool->tblidx = (int)(hashidx % cnt);
 			memset(&rpool->counter, 0, sizeof(rpool->counter));
-			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af,
-			    pf_islinklocal)) {
+			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
+			    af, pf_islinklocal, false)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
@@ -710,11 +701,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL))
+			    &rpool->tblidx, &rpool->counter, af, NULL, true))
 				goto get_addr;
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
+			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal,
+			    true))
 				goto get_addr;
 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
 			goto get_addr;
@@ -724,9 +716,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 			rpool->cur = TAILQ_FIRST(&rpool->list);
 		else
 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+		rpool->tblidx = -1;
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+			    &rpool->tblidx, &rpool->counter, af, NULL, true)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
@@ -734,9 +727,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				goto done_pool_mtx;
 			}
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
-			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
+			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal,
+			    true)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
@@ -764,48 +757,41 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 done_pool_mtx:
 	mtx_unlock(&rpool->mtx);
 
-	if (reason) {
-		counter_u64_add(V_pf_status.counters[reason], 1);
-	}
-
 	return (reason);
 }
 
 u_short
 pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
-    struct pf_ksrc_node **sn, struct pf_srchash **sh, struct pf_kpool *rpool,
-    pf_sn_types_t sn_type)
+    struct pf_kpool *rpool, pf_sn_types_t sn_type)
 {
+	struct pf_ksrc_node	*sn = NULL;
+	struct pf_srchash	*sh = NULL;
 	u_short			 reason = 0;
 
-	KASSERT(*sn == NULL, ("*sn not NULL"));
-
 	/*
 	 * If this is a sticky-address rule, try to find an existing src_node.
-	 * Request the sh to be unlocked if sn was not found, as we never
-	 * insert a new sn when parsing the ruleset.
 	 */
 	if (rpool->opts & PF_POOL_STICKYADDR &&
 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
-		*sn = pf_find_src_node(saddr, r, af, sh, sn_type, false);
+		sn = pf_find_src_node(saddr, r, af, &sh, sn_type, false);
 
-	if (*sn != NULL) {
-		PF_SRC_NODE_LOCK_ASSERT(*sn);
+	if (sn != NULL) {
+		PF_SRC_NODE_LOCK_ASSERT(sn);
 
 		/* If the supplied address is the same as the current one we've
 		 * been asked before, so tell the caller that there's no other
 		 * address to be had. */
-		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
+		if (PF_AEQ(naddr, &(sn->raddr), af)) {
 			reason = PFRES_MAPFAILED;
 			goto done;
 		}
 
-		pf_addrcpy(naddr, &(*sn)->raddr, af);
+		pf_addrcpy(naddr, &(sn->raddr), af);
 		if (nkif)
-			*nkif = (*sn)->rkif;
+			*nkif = sn->rkif;
 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
-			printf("pf_map_addr: src tracking maps ");
+			printf("%s: src tracking maps ", __func__);
 			pf_print_host(saddr, 0, af);
 			printf(" to ");
 			pf_print_host(naddr, 0, af);
@@ -820,14 +806,16 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 	 * Source node has not been found. Find a new address and store it
 	 * in variables given by the caller.
 	 */
-	if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) {
-		/* pf_map_addr() sets reason counters on its own */
+	if ((reason = pf_map_addr(af, r, saddr, naddr, nkif, init_addr,
+	    rpool)) != 0) {
+		if (V_pf_status.debug >= PF_DEBUG_MISC)
+			printf("%s: pf_map_addr has failed\n", __func__);
 		goto done;
 	}
 
 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
-		printf("pf_map_addr: selected address ");
+		printf("%s: selected address ", __func__);
 		pf_print_host(naddr, 0, af);
 		if (nkif)
 			printf("@%s", (*nkif)->pfik_name);
@@ -835,12 +823,8 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 	}
 
 done:
-	if ((*sn) != NULL)
-		PF_SRC_NODE_UNLOCK(*sn);
-
-	if (reason) {
-		counter_u64_add(V_pf_status.counters[reason], 1);
-	}
+	if (sn != NULL)
+		PF_SRC_NODE_UNLOCK(sn);
 
 	return (reason);
 }
@@ -890,8 +874,6 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 {
 	struct pf_pdesc	*pd = ctx->pd;
 	struct pf_addr	*naddr;
-	struct pf_ksrc_node	*sn = NULL;
-	struct pf_srchash	*sh = NULL;
 	uint16_t	*nportp;
 	uint16_t	 low, high;
 	u_short		 reason;
@@ -919,8 +901,8 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 			high = rpool->proxy_port[1];
 		}
 		if (rpool->mape.offset > 0) {
-			if (pf_get_mape_sport(pd, r, naddr, nportp, &sn,
-			    &sh, &ctx->udp_mapping, rpool)) {
+			if (pf_get_mape_sport(pd, r, naddr, nportp,
+			    &ctx->udp_mapping, rpool)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: MAP-E port allocation (%u/%u/%u)"
 				    " failed\n",
@@ -930,8 +912,8 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 				reason = PFRES_MAPFAILED;
 				goto notrans;
 			}
-		} else if (pf_get_sport(pd, r, naddr, nportp, low, high, &sn,
-		    &sh, rpool, &ctx->udp_mapping, PF_SN_NAT)) {
+		} else if (pf_get_sport(pd, r, naddr, nportp, low, high,
+		    rpool, &ctx->udp_mapping, PF_SN_NAT)) {
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
 			    rpool->proxy_port[0], rpool->proxy_port[1]));
@@ -1017,7 +999,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
 		uint16_t cut, low, high, nport;
 
 		reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, NULL,
-		    NULL, &sn, &sh, rpool, PF_SN_NAT);
+		    NULL, rpool, PF_SN_NAT);
 		if (reason != 0)
 			goto notrans;
 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
@@ -1134,8 +1116,6 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
 	struct pf_addr	 ndaddr, nsaddr, naddr;
 	u_int16_t	 nport = 0;
 	int		 prefixlen = 96;
-	struct pf_srchash	*sh = NULL;
-	struct pf_ksrc_node	*sns = NULL;
 
 	bzero(&nsaddr, sizeof(nsaddr));
 	bzero(&ndaddr, sizeof(ndaddr));
@@ -1154,9 +1134,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
 		panic("pf_get_transaddr_af: no nat pool for source address");
 
 	/* get source address and port */
-	if (pf_get_sport(pd, r, &nsaddr, &nport,
-	    r->nat.proxy_port[0], r->nat.proxy_port[1], &sns, &sh, &r->nat,
-	    NULL, PF_SN_NAT)) {
+	if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0],
+	    r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) {
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: af-to NAT proxy port allocation (%u-%u) failed",
 		    r->nat.proxy_port[0], r->nat.proxy_port[1]));
@@ -1182,7 +1161,7 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
 	/* get the destination address and port */
 	if (! TAILQ_EMPTY(&r->rdr.list)) {
 		if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, NULL, NULL,
-		    &sns, NULL, &r->rdr, PF_SN_NAT))
+		    &r->rdr, PF_SN_NAT))
 			return (-1);
 		if (r->rdr.proxy_port[0])
 			pd->ndport = htons(r->rdr.proxy_port[0]);
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index 43e4366845a2..9c0151b7da2b 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -819,10 +819,10 @@ pfr_create_kentry(struct pfr_addr *ad, bool counters)
 static void
 pfr_destroy_kentries(struct pfr_kentryworkq *workq)
 {
-	struct pfr_kentry	*p, *q;
+	struct pfr_kentry	*p;
 
-	for (p = SLIST_FIRST(workq); p != NULL; p = q) {
-		q = SLIST_NEXT(p, pfrke_workq);
+	while ((p = SLIST_FIRST(workq)) != NULL) {
+		SLIST_REMOVE_HEAD(workq, pfrke_workq);
 		pfr_destroy_kentry(p);
 	}
 }
@@ -1680,8 +1680,7 @@ pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
 	}
 
 	if (!(flags & PFR_FLAG_DUMMY)) {
-		for (p = SLIST_FIRST(&workq); p != NULL; p = q) {
-			q = SLIST_NEXT(p, pfrkt_workq);
+		SLIST_FOREACH_SAFE(p, &workq, pfrkt_workq, q) {
 			pfr_commit_ktable(p, tzero);
 		}
 		rs->topen = 0;
@@ -1710,7 +1709,7 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero)
 	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
 		/* kt might contain addresses */
 		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
-		struct pfr_kentry	*p, *q, *next;
+		struct pfr_kentry	*p, *q;
 		struct pfr_addr		 ad;
 
 		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
@@ -1720,7 +1719,8 @@ pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero)
 		SLIST_INIT(&delq);
 		SLIST_INIT(&garbageq);
 		pfr_clean_node_mask(shadow, &addrq);
-		SLIST_FOREACH_SAFE(p, &addrq, pfrke_workq, next) {
+		while ((p = SLIST_FIRST(&addrq)) != NULL) {
+			SLIST_REMOVE_HEAD(&addrq, pfrke_workq);
 			pfr_copyout_addr(&ad, p);
 			q = pfr_lookup_addr(kt, &ad, 1);
 			if (q != NULL) {
@@ -1864,8 +1864,7 @@ pfr_setflags_ktables(struct pfr_ktableworkq *workq)
 {
 	struct pfr_ktable	*p, *q;
 
-	for (p = SLIST_FIRST(workq); p; p = q) {
-		q = SLIST_NEXT(p, pfrkt_workq);
+	SLIST_FOREACH_SAFE(p, workq, pfrkt_workq, q) {
 		pfr_setflags_ktable(p, p->pfrkt_nflags);
 	}
 }
@@ -2015,10 +2014,10 @@ pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset)
 static void
 pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
 {
-	struct pfr_ktable	*p, *q;
+	struct pfr_ktable	*p;
 
-	for (p = SLIST_FIRST(workq); p; p = q) {
-		q = SLIST_NEXT(p, pfrkt_workq);
+	while ((p = SLIST_FIRST(workq)) != NULL) {
+		SLIST_REMOVE_HEAD(workq, pfrkt_workq);
 		pfr_destroy_ktable(p, flushaddr);
 	}
 }
@@ -2074,17 +2073,16 @@ pfr_lookup_table(struct pfr_table *tbl)
 	    (struct pfr_ktable *)tbl));
 }
 
-int
-pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+static struct pfr_kentry *
+pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+    int exact)
 {
 	struct pfr_kentry	*ke = NULL;
-	int			 match;
 
 	PF_RULES_RASSERT();
 
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-		kt = kt->pfrkt_root;
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+	kt = pfr_ktable_select_active(kt);
+	if (kt == NULL)
 		return (0);
 
 	switch (af) {
@@ -2121,11 +2119,26 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
 	default:
 		unhandled_af(af);
 	}
+	if (exact && ke && KENTRY_NETWORK(ke))
+		ke = NULL;
+
+	return (ke);
+}
+
+int
+pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
+{
+	struct pfr_kentry	*ke = NULL;
+	int match;
+
+	ke = pfr_kentry_byaddr(kt, a, af, 0);
+
 	match = (ke && !ke->pfrke_not);
 	if (match)
 		pfr_kstate_counter_add(&kt->pfrkt_match, 1);
 	else
 		pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
+
 	return (match);
 }
 
@@ -2135,9 +2148,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
 {
 	struct pfr_kentry	*ke = NULL;
 
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-		kt = kt->pfrkt_root;
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+	kt = pfr_ktable_select_active(kt);
+	if (kt == NULL)
 		return;
 
 	switch (af) {
@@ -2281,7 +2293,7 @@ pfr_detach_table(struct pfr_ktable *kt)
 
 int
 pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
-    sa_family_t af, pf_addr_filter_func_t filter)
+    sa_family_t af, pf_addr_filter_func_t filter, bool loop_once)
 {
 	struct pf_addr		*addr, cur, mask, umask_addr;
 	union sockaddr_union	 uaddr, umask;
@@ -2306,9 +2318,8 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
 		unhandled_af(af);
 	}
 
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
-		kt = kt->pfrkt_root;
-	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+	kt = pfr_ktable_select_active(kt);
+	if (kt == NULL)
 		return (-1);
 
 	idx = *pidx;
@@ -2327,7 +2338,7 @@ _next_block:
 	ke = pfr_kentry_byidx(kt, idx, af);
 	if (ke == NULL) {
 		/* we don't have this idx, try looping */
-		if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
+		if ((loop || loop_once) || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
 			pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
 			return (1);
 		}
@@ -2455,3 +2466,14 @@ pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
 		unhandled_af(dyn->pfid_af);
 	}
 }
+
+struct pfr_ktable *
+pfr_ktable_select_active(struct pfr_ktable *kt)
+{
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
+		kt = kt->pfrkt_root;
+	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
+		return (NULL);
+
+	return (kt);
+}
diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index 7746b668265d..ae17b3289593 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@@ -1469,6 +1469,9 @@ moea_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 	pmap_t	pmap;
 	u_int	lo;
 
+	if (m->md.mdpg_cache_attrs == ma)
+		return;
+
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 79cea408bb5f..796b1719b8ba 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -2134,6 +2134,9 @@ moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 	CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
 	    __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
 
+	if (m->md.mdpg_cache_attrs == ma)
+		return;
+
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c
index 45f7bef8bcc9..a12142fc2d7b 100644
--- a/sys/powerpc/aim/mmu_radix.c
+++ b/sys/powerpc/aim/mmu_radix.c
@@ -5937,6 +5937,10 @@ mmu_radix_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	CTR3(KTR_PMAP, "%s(%p, %#x)", __func__, m, ma);
+
+	if (m->md.mdpg_cache_attrs == ma)
+		return;
+
 	m->md.mdpg_cache_attrs = ma;
 
 	/*
diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h
index 050ada6b0f64..0230cf78aba7 100644
--- a/sys/powerpc/include/pcb.h
+++ b/sys/powerpc/include/pcb.h
@@ -66,16 +66,8 @@ struct pcb {
 #define	PCB_VECREGS     0x200	/* Process had Altivec registers initialized */
 	struct fpu {
 		union {
-#if _BYTE_ORDER == _BIG_ENDIAN
-			double fpr;
-			uint32_t vsr[4];
-#else
 			uint32_t vsr[4];
-			struct {
-				double padding;
-				double fpr;
-			};
-#endif
+			double fpr;
 		} fpr[32];
 		double	fpscr;	/* FPSCR stored as double for easier access */
 	} pcb_fpu;		/* Floating point processor */
diff --git a/sys/powerpc/include/ucontext.h b/sys/powerpc/include/ucontext.h
index d35c6c773fe0..dc87edd578bc 100644
--- a/sys/powerpc/include/ucontext.h
+++ b/sys/powerpc/include/ucontext.h
@@ -41,6 +41,7 @@ typedef struct __mcontext {
 	int		mc_flags;
 #define _MC_FP_VALID	0x01
 #define _MC_AV_VALID	0x02
+#define _MC_VS_VALID	0x04
 	int		mc_onstack;	  	/* saved onstack flag */
 	int		mc_len;			/* sizeof(__mcontext) */
 	__uint64_t	mc_avec[32*2];		/* vector register file */
@@ -56,6 +57,7 @@ typedef struct __mcontext32 {
 	int		mc_flags;
 #define _MC_FP_VALID	0x01
 #define _MC_AV_VALID	0x02
+#define _MC_VS_VALID	0x04
 	int		mc_onstack;	  	/* saved onstack flag */
 	int		mc_len;			/* sizeof(__mcontext) */
 	uint64_t	mc_avec[32*2];		/* vector register file */
diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c
index 1893d79f29a8..8a33d0f589a7 100644
--- a/sys/powerpc/powerpc/exec_machdep.c
+++ b/sys/powerpc/powerpc/exec_machdep.c
@@ -214,10 +214,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 		sfpsize = sizeof(sf);
 		#ifdef __powerpc64__
 		/*
-		 * 64-bit PPC defines a 288 byte scratch region
-		 * below the stack.
+		 * 64-bit PPC defines a 512 byte red zone below
+		 * the existing stack (ELF ABI v2 §2.2.2.4)
 		 */
-		rndfsize = 288 + roundup(sizeof(sf), 48);
+		rndfsize = 512 + roundup(sizeof(sf), 48);
 		#else
 		rndfsize = roundup(sizeof(sf), 16);
 		#endif
@@ -349,13 +349,6 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 	if (error != 0)
 		return (error);
 
-	/*
-	 * Save FPU state if needed. User may have changed it on
-	 * signal handler
-	 */
-	if (uc.uc_mcontext.mc_srr1 & PSL_FP)
-		save_fpu(td);
-
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
@@ -432,6 +425,7 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 	}
 
 	if (pcb->pcb_flags & PCB_VSX) {
+		mcp->mc_flags |= _MC_VS_VALID;
 		for (i = 0; i < 32; i++)
 			memcpy(&mcp->mc_vsxfpreg[i],
 			    &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double));
@@ -481,6 +475,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 	struct pcb *pcb;
 	struct trapframe *tf;
 	register_t tls;
+	register_t msr;
 	int i;
 
 	pcb = td->td_pcb;
@@ -531,6 +526,22 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 	tf->srr1 &= ~(PSL_FP | PSL_VSX | PSL_VEC);
 	pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX | PCB_VEC);
 
+	/*
+	 * Ensure the FPU is also disabled in hardware.
+	 *
+	 * Without this, it's possible for the register reload to fail if we
+	 * don't switch to a FPU disabled context before resuming the original
+	 * thread.  Specifically, if the FPU/VSX unavailable exception is never
+	 * hit, then whatever data is still in the FP/VSX registers when
+	 * sigresume is callled will used by the resumed thread, instead of the
+	 * previously saved data from the mcontext.
+	 */
+	critical_enter();
+	msr = mfmsr() & ~(PSL_FP | PSL_VSX | PSL_VEC);
+	isync();
+	mtmsr(msr);
+	critical_exit();
+
 	if (mcp->mc_flags & _MC_FP_VALID) {
 		/* enable_fpu() will happen lazily on a fault */
 		pcb->pcb_flags |= PCB_FPREGS;
@@ -539,8 +550,12 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 		for (i = 0; i < 32; i++) {
 			memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
 			    sizeof(double));
-			memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
-			    &mcp->mc_vsxfpreg[i], sizeof(double));
+		}
+		if (mcp->mc_flags & _MC_VS_VALID) {
+			for (i = 0; i < 32; i++) {
+				memcpy(&pcb->pcb_fpu.fpr[i].vsr[2],
+				    &mcp->mc_vsxfpreg[i], sizeof(double));
+			}
 		}
 	}
 
diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c
index 0eaff2ea4932..cc8f22f7dda3 100644
--- a/sys/powerpc/powerpc/fpu.c
+++ b/sys/powerpc/powerpc/fpu.c
@@ -64,8 +64,19 @@ save_fpu_int(struct thread *td)
 	 * Save the floating-point registers and FPSCR to the PCB
 	 */
 	if (pcb->pcb_flags & PCB_VSX) {
-	#define SFP(n)   __asm ("stxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+	#define SFP(n)   __asm("stxvw4x " #n ", 0,%0" \
 			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+	/*
+	 * stxvw2x will swap words within the FP double word on LE systems,
+	 * leading to corruption if VSX is used to store state and FP is
+	 * subsequently used to restore state.
+	 * Use stxvd2x instead.
+	 */
+	#define SFP(n)   __asm("stxvd2x " #n ", 0,%0" \
+			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
 		SFP(0);		SFP(1);		SFP(2);		SFP(3);
 		SFP(4);		SFP(5);		SFP(6);		SFP(7);
 		SFP(8);		SFP(9);		SFP(10);	SFP(11);
@@ -76,7 +87,7 @@ save_fpu_int(struct thread *td)
 		SFP(28);	SFP(29);	SFP(30);	SFP(31);
 	#undef SFP
 	} else {
-	#define SFP(n)   __asm ("stfd " #n ", 0(%0)" \
+	#define SFP(n)   __asm("stfd " #n ", 0(%0)" \
 			:: "b"(&pcb->pcb_fpu.fpr[n].fpr));
 		SFP(0);		SFP(1);		SFP(2);		SFP(3);
 		SFP(4);		SFP(5);		SFP(6);		SFP(7);
@@ -149,8 +160,19 @@ enable_fpu(struct thread *td)
 			  :: "b"(&pcb->pcb_fpu.fpscr));
 
 	if (pcb->pcb_flags & PCB_VSX) {
-	#define LFP(n)   __asm ("lxvw4x " #n ", 0,%0" \
+#if _BYTE_ORDER == _BIG_ENDIAN
+	#define LFP(n)   __asm("lxvw4x " #n ", 0,%0" \
+			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#else
+	/*
+	 * lxvw4x will swap words within the FP double word on LE systems,
+	 * leading to corruption if FP is used to store state and VSX is
+	 * subsequently used to restore state.
+	 * Use lxvd2x instead.
+	 */
+	#define LFP(n)   __asm("lxvd2x " #n ", 0,%0" \
 			:: "b"(&pcb->pcb_fpu.fpr[n]));
+#endif
 		LFP(0);		LFP(1);		LFP(2);		LFP(3);
 		LFP(4);		LFP(5);		LFP(6);		LFP(7);
 		LFP(8);		LFP(9);		LFP(10);	LFP(11);
@@ -161,7 +183,7 @@ enable_fpu(struct thread *td)
 		LFP(28);	LFP(29);	LFP(30);	LFP(31);
 	#undef LFP
 	} else {
-	#define LFP(n)   __asm ("lfd " #n ", 0(%0)" \
+	#define LFP(n)   __asm("lfd " #n ", 0(%0)" \
 			:: "b"(&pcb->pcb_fpu.fpr[n].fpr));
 		LFP(0);		LFP(1);		LFP(2);		LFP(3);
 		LFP(4);		LFP(5);		LFP(6);		LFP(7);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 5d15bd671285..26efaecc64d1 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -4838,6 +4838,8 @@ pmap_unmapbios(void *p, vm_size_t size)
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
+	if (m->md.pv_memattr == ma)
+		return;
 
 	m->md.pv_memattr = ma;
 
diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h
index 15557c614f88..7bf1d264ff5e 100644
--- a/sys/sys/exterrvar.h
+++ b/sys/sys/exterrvar.h
@@ -21,6 +21,7 @@
 
 #define	EXTERRCTL_ENABLE	1
 #define	EXTERRCTL_DISABLE	2
+#define	EXTERRCTL_UD		3
 
 #define	EXTERRCTLF_FORCE	0x00000001
 
diff --git a/sys/sys/param.h b/sys/sys/param.h
index af116d6e3f7a..a8e9635242dd 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500051
+#define __FreeBSD_version 1500052
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 86b75a2d7989..d6bd06226d04 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -384,8 +384,8 @@ swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
 #endif
 }
 
-static int swap_pager_full = 2;	/* swap space exhaustion (task killing) */
-static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
+static bool swap_pager_full = true; /* swap space exhaustion (task killing) */
+static bool swap_pager_almost_full = true; /* swap space exhaustion (w/hysteresis) */
 static struct mtx swbuf_mtx;	/* to sync nsw_wcount_async */
 static int nsw_wcount_async;	/* limit async write buffers */
 static int nsw_wcount_async_max;/* assigned maximum			*/
@@ -642,14 +642,14 @@ swp_sizecheck(void)
 {
 
 	if (swap_pager_avail < nswap_lowat) {
-		if (swap_pager_almost_full == 0) {
+		if (!swap_pager_almost_full) {
 			printf("swap_pager: out of swap space\n");
-			swap_pager_almost_full = 1;
+			swap_pager_almost_full = true;
 		}
 	} else {
-		swap_pager_full = 0;
+		swap_pager_full = false;
 		if (swap_pager_avail > nswap_hiwat)
-			swap_pager_almost_full = 0;
+			swap_pager_almost_full = false;
 	}
 }
 
@@ -958,11 +958,10 @@ swp_pager_getswapspace(int *io_npages)
 		swp_sizecheck();
 		swdevhd = TAILQ_NEXT(sp, sw_list);
 	} else {
-		if (swap_pager_full != 2) {
+		if (!swap_pager_full) {
 			printf("swp_pager_getswapspace(%d): failed\n",
 			    *io_npages);
-			swap_pager_full = 2;
-			swap_pager_almost_full = 1;
+			swap_pager_full = swap_pager_almost_full = true;
 		}
 		swdevhd = NULL;
 	}
@@ -2863,10 +2862,8 @@ swapoff_one(struct swdevt *sp, struct ucred *cred, u_int flags)
 	sp->sw_id = NULL;
 	TAILQ_REMOVE(&swtailq, sp, sw_list);
 	nswapdev--;
-	if (nswapdev == 0) {
-		swap_pager_full = 2;
-		swap_pager_almost_full = 1;
-	}
+	if (nswapdev == 0)
+		swap_pager_full = swap_pager_almost_full = true;
 	if (swdevhd == sp)
 		swdevhd = NULL;
 	mtx_unlock(&sw_dev_mtx);
diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c
index 7b8bf4c77663..b44bdb96b0d4 100644
--- a/sys/vm/vm_domainset.c
+++ b/sys/vm/vm_domainset.c
@@ -131,8 +131,7 @@ static void
 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
 {
 
-	KASSERT(di->di_n > 0,
-	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
+	KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
 	switch (di->di_policy) {
 	case DOMAINSET_POLICY_FIRSTTOUCH:
 		/*
@@ -149,11 +148,10 @@ vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
 		vm_domainset_iter_prefer(di, domain);
 		break;
 	default:
-		panic("vm_domainset_iter_first: Unknown policy %d",
-		    di->di_policy);
+		panic("%s: Unknown policy %d", __func__, di->di_policy);
 	}
 	KASSERT(*domain < vm_ndomains,
-	    ("vm_domainset_iter_next: Invalid domain %d", *domain));
+	    ("%s: Invalid domain %d", __func__, *domain));
 }
 
 static void
@@ -189,13 +187,11 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
 		di->di_n = di->di_domain->ds_cnt;
 		break;
 	default:
-		panic("vm_domainset_iter_first: Unknown policy %d",
-		    di->di_policy);
+		panic("%s: Unknown policy %d", __func__, di->di_policy);
 	}
-	KASSERT(di->di_n > 0,
-	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
+	KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n));
 	KASSERT(*domain < vm_ndomains,
-	    ("vm_domainset_iter_first: Invalid domain %d", *domain));
+	    ("%s: Invalid domain %d", __func__, *domain));
 }
 
 void
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 875c22d27628..e7d7b6726d2c 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -110,11 +110,18 @@ u_int exec_map_entry_size;
 u_int exec_map_entries;
 
 SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
-    SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address");
+#if defined(__amd64__)
+    &kva_layout.km_low, 0,
+#else
+    SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS,
+#endif
+    "Min kernel address");
 
 SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD,
 #if defined(__arm__)
     &vm_max_kernel_address, 0,
+#elif defined(__amd64__)
+    &kva_layout.km_high, 0,
 #else
     SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS,
 #endif
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index cbbd27389662..9bd3b389fb60 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -260,9 +260,9 @@ struct vm_domain {
 	u_int vmd_inactive_shortage;		/* Per-thread shortage. */
 	blockcount_t vmd_inactive_running;	/* Number of inactive threads. */
 	blockcount_t vmd_inactive_starting;	/* Number of threads started. */
-	volatile u_int vmd_addl_shortage;	/* Shortage accumulator. */
-	volatile u_int vmd_inactive_freed;	/* Successful inactive frees. */
-	volatile u_int vmd_inactive_us;		/* Microseconds for above. */
+	u_int vmd_addl_shortage;	/* (a) Shortage accumulator. */
+	u_int vmd_inactive_freed;	/* (a) Successful inactive frees. */
+	u_int vmd_inactive_us;		/* (a) Microseconds for above. */
 	u_int vmd_inactive_pps;		/* Exponential decay frees/second. */
 	int vmd_oom_seq;
 	int vmd_last_active_scan;