31 files changed, 713 insertions, 185 deletions
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 99565fbb69ca..8cada2f4f911 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -74,7 +74,7 @@ extern int		acpi_susp_bounce;
 extern struct susppcb	**susppcbs;
 static cpuset_t		suspcpus;
 
-static void		acpi_stop_beep(void *);
+static void		acpi_stop_beep(void *, enum power_stype);
 
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
 static void		acpi_wakeup_cpus(struct acpi_softc *);
@@ -88,7 +88,7 @@ static void		acpi_wakeup_cpus(struct acpi_softc *);
 } while (0)
 
 static void
-acpi_stop_beep(void *arg)
+acpi_stop_beep(void *arg, enum power_stype stype)
 {
 
 	if (acpi_resume_beep != 0)
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
index da051e8f7c8a..393d6d89da0c 100644
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -2180,6 +2180,7 @@
 #define	OSLAR_EL1_CRn			1
 #define	OSLAR_EL1_CRm			0
 #define	OSLAR_EL1_op2			4
+#define	OSLAR_OSLK			(0x1ul << 0)
 
 /* OSLSR_EL1 */
 #define	OSLSR_EL1_op0			2
@@ -2187,6 +2188,10 @@
 #define	OSLSR_EL1_CRn			1
 #define	OSLSR_EL1_CRm			1
 #define	OSLSR_EL1_op2			4
+#define	OSLSR_OSLM_1			(0x1ul << 3)
+#define	OSLSR_nTT			(0x1ul << 2)
+#define	OSLSR_OSLK			(0x1ul << 1)
+#define	OSLSR_OSLM_0			(0x1ul << 0)
 
 /* PAR_EL1 - Physical Address Register */
 #define	PAR_F_SHIFT		0
diff --git a/sys/arm64/vmm/arm64.h b/sys/arm64/vmm/arm64.h
index 334b795832a3..f9b74aef7188 100644
--- a/sys/arm64/vmm/arm64.h
+++ b/sys/arm64/vmm/arm64.h
@@ -119,6 +119,7 @@ struct hypctx {
 	struct vgic_v3_regs	vgic_v3_regs;
 	struct vgic_v3_cpu	*vgic_cpu;
 	bool			has_exception;
+	bool			dbg_oslock;
 };
 
 struct hyp {
diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c
index 1dcefa1489e9..a551a2807183 100644
--- a/sys/arm64/vmm/vmm.c
+++ b/sys/arm64/vmm/vmm.c
@@ -651,6 +651,33 @@ vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
 	return (0);
 }
 
+static int
+vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg)
+{
+	struct hypctx *hypctx;
+
+	hypctx = vcpu_get_cookie(vcpu);
+	/* All other fields are RES0 & we don't do anything with this */
+	/* TODO: Disable access to other debug state when locked */
+	hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK;
+	return (0);
+}
+
+static int
+vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg)
+{
+	struct hypctx *hypctx;
+	uint64_t val;
+
+	hypctx = vcpu_get_cookie(vcpu);
+	val = OSLSR_OSLM_1;
+	if (hypctx->dbg_oslock)
+		val |= OSLSR_OSLK;
+	*rval = val;
+
+	return (0);
+}
+
 static const struct vmm_special_reg vmm_special_regs[] = {
 #define	SPECIAL_REG(_reg, _read, _write)				\
 	{								\
@@ -707,6 +734,13 @@ static const struct vmm_special_reg vmm_special_regs[] = {
 	SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
 	    vtimer_phys_tval_write),
 	SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
+
+	/* Debug registers */
+	SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi),
+	SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi),
+	/* TODO: Exceptions on invalid access */
+	SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1),
+	SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi),
 #undef SPECIAL_REG
 };
 
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index 1facab47473c..0d844a6fbf9e 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -44,6 +44,7 @@
 #include <sys/malloc.h>
 #include <sys/endian.h>
 #include <sys/cons.h>
+#include <sys/power.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/sbuf.h>
@@ -878,8 +879,8 @@ static  int		adaerror(union ccb *ccb, uint32_t cam_flags,
 				uint32_t sense_flags);
 static callout_func_t	adasendorderedtag;
 static void		adashutdown(void *arg, int howto);
-static void		adasuspend(void *arg);
-static void		adaresume(void *arg);
+static void		adasuspend(void *arg, enum power_stype stype);
+static void		adaresume(void *arg, enum power_stype stype);
 
 #ifndef ADA_DEFAULT_TIMEOUT
 #define ADA_DEFAULT_TIMEOUT 30	/* Timeout in seconds */
@@ -3747,7 +3748,7 @@ adashutdown(void *arg, int howto)
 }
 
 static void
-adasuspend(void *arg)
+adasuspend(void *arg, enum power_stype stype)
 {
 
 	adaflush();
@@ -3760,7 +3761,7 @@ adasuspend(void *arg)
 }
 
 static void
-adaresume(void *arg)
+adaresume(void *arg, enum power_stype stype)
 {
 	struct cam_periph *periph;
 	struct ada_softc *softc;
diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c
index 1c0d5e8381d8..9c4707da482c 100644
--- a/sys/cam/nvme/nvme_da.c
+++ b/sys/cam/nvme/nvme_da.c
@@ -43,6 +43,7 @@
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/cons.h>
+#include <sys/power.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/sbuf.h>
@@ -159,7 +160,7 @@ static	void		ndadone(struct cam_periph *periph,
 static  int		ndaerror(union ccb *ccb, uint32_t cam_flags,
 				uint32_t sense_flags);
 static void		ndashutdown(void *arg, int howto);
-static void		ndasuspend(void *arg);
+static void		ndasuspend(void *arg, enum power_stype stype);
 
 #ifndef	NDA_DEFAULT_SEND_ORDERED
 #define	NDA_DEFAULT_SEND_ORDERED	1
@@ -1365,7 +1366,7 @@ ndashutdown(void *arg, int howto)
 }
 
 static void
-ndasuspend(void *arg)
+ndasuspend(void *arg, enum power_stype stype)
 {
 
 	ndaflush();
diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c
index 43783bb8727b..c7d62c745c7e 100644
--- a/sys/compat/linuxkpi/common/src/linux_acpi.c
+++ b/sys/compat/linuxkpi/common/src/linux_acpi.c
@@ -33,6 +33,7 @@
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/power.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include <dev/acpica/acpivar.h>
@@ -118,20 +119,32 @@ acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid,
 }
 
 static void
-linux_handle_power_suspend_event(void *arg __unused)
+linux_handle_power_suspend_event(void *arg __unused, enum power_stype stype)
 {
-	/*
-	 * Only support S3 for now.
-	 * acpi_sleep_event isn't always called so we use power_suspend_early
-	 * instead which means we don't know what state we're switching to.
-	 * TODO: Make acpi_sleep_event consistent
-	 */
-	linux_acpi_target_sleep_state = ACPI_STATE_S3;
-	pm_suspend_target_state = PM_SUSPEND_MEM;
+	switch (stype) {
+	case POWER_STYPE_SUSPEND_TO_IDLE:
+		/*
+		 * XXX: obiwac Not 100% sure this is correct, but
+		 * acpi_target_sleep_state does seem to be set to
+		 * ACPI_STATE_S3 during s2idle on Linux.
+		 */
+		linux_acpi_target_sleep_state = ACPI_STATE_S3;
+		pm_suspend_target_state = PM_SUSPEND_TO_IDLE;
+		break;
+	case POWER_STYPE_SUSPEND_TO_MEM:
+		linux_acpi_target_sleep_state = ACPI_STATE_S3;
+		pm_suspend_target_state = PM_SUSPEND_MEM;
+		break;
+	default:
+		printf("%s: sleep type %d not yet supported\n",
+		    __func__, stype);
+		break;
+	}
 }
 
 static void
-linux_handle_power_resume_event(void *arg __unused)
+linux_handle_power_resume_event(void *arg __unused,
+    enum power_stype stype __unused)
 {
 	linux_acpi_target_sleep_state = ACPI_STATE_S0;
 	pm_suspend_target_state = PM_SUSPEND_ON;
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 393bfaa65ff5..ace2360c032d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -188,6 +188,11 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
+	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_max, "LU",
+	"Maximum ARC size in bytes (LEGACY)");
+
 int
 param_set_arc_min(SYSCTL_HANDLER_ARGS)
 {
@@ -212,6 +217,11 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
+	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_min, "LU",
+	"Minimum ARC size in bytes (LEGACY)");
+
 extern uint_t zfs_arc_free_target;
 
 int
@@ -235,6 +245,16 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+/*
+ * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on
+ * pagedaemon initialization.
+ */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
+	CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_free_target, "IU",
+	"Desired number of free pages below which ARC triggers reclaim"
+	" (LEGACY)");
+
 int
 param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 {
@@ -253,6 +273,187 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
+	CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_no_grow_shift, "I",
+	"log2(fraction of ARC which must be free to allow growing) (LEGACY)");
+
+extern uint64_t l2arc_write_max;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max,
+	CTLFLAG_RWTUN, &l2arc_write_max, 0,
+	"Max write bytes per interval (LEGACY)");
+
+extern uint64_t l2arc_write_boost;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost,
+	CTLFLAG_RWTUN, &l2arc_write_boost, 0,
+	"Extra write bytes during device warmup (LEGACY)");
+
+extern uint64_t l2arc_headroom;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom,
+	CTLFLAG_RWTUN, &l2arc_headroom, 0,
+	"Number of max device writes to precache (LEGACY)");
+
+extern uint64_t l2arc_headroom_boost;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost,
+	CTLFLAG_RWTUN, &l2arc_headroom_boost, 0,
+	"Compressed l2arc_headroom multiplier (LEGACY)");
+
+extern uint64_t l2arc_feed_secs;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs,
+	CTLFLAG_RWTUN, &l2arc_feed_secs, 0,
+	"Seconds between L2ARC writing (LEGACY)");
+
+extern uint64_t l2arc_feed_min_ms;
+
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms,
+	CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0,
+	"Min feed interval in milliseconds (LEGACY)");
+
+extern int l2arc_noprefetch;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch,
+	CTLFLAG_RWTUN, &l2arc_noprefetch, 0,
+	"Skip caching prefetched buffers (LEGACY)");
+
+extern int l2arc_feed_again;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again,
+	CTLFLAG_RWTUN, &l2arc_feed_again, 0,
+	"Turbo L2ARC warmup (LEGACY)");
+
+extern int l2arc_norw;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
+	CTLFLAG_RWTUN, &l2arc_norw, 0,
+	"No reads during writes (LEGACY)");
+
+static int
+param_get_arc_state_size(SYSCTL_HANDLER_ARGS)
+{
+	arc_state_t *state = (arc_state_t *)arg1;
+	int64_t val;
+
+	val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) +
+	    zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]);
+	return (sysctl_handle_64(oidp, &val, 0, req));
+}
+
+extern arc_state_t ARC_anon;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_anon, 0, param_get_arc_state_size, "Q",
+	"size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in anonymous state");
+
+extern arc_state_t ARC_mru;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mru, 0, param_get_arc_state_size, "Q",
+	"size of mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mru state");
+
+extern arc_state_t ARC_mru_ghost;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mru_ghost, 0, param_get_arc_state_size, "Q",
+	"size of mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mru ghost state");
+
+extern arc_state_t ARC_mfu;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mfu, 0, param_get_arc_state_size, "Q",
+	"size of mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mfu state");
+
+extern arc_state_t ARC_mfu_ghost;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_mfu_ghost, 0, param_get_arc_state_size, "Q",
+	"size of mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in mfu ghost state");
+
+extern arc_state_t ARC_uncached;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_uncached, 0, param_get_arc_state_size, "Q",
+	"size of uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
+	&ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of evictable metadata in uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
+	&ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of evictable data in uncached state");
+
+extern arc_state_t ARC_l2c_only;
+
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size,
+	CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+	&ARC_l2c_only, 0, param_get_arc_state_size, "Q",
+	"size of l2c_only state");
+
+/* dbuf.c */
+
+/* dmu.c */
+
+/* dmu_zfetch.c */
+
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
+
+extern uint32_t	zfetch_max_distance;
+
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance,
+	CTLFLAG_RWTUN, &zfetch_max_distance, 0,
+	"Max bytes to prefetch per stream (LEGACY)");
+
+extern uint32_t	zfetch_max_idistance;
+
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
+	CTLFLAG_RWTUN, &zfetch_max_idistance, 0,
+	"Max bytes to prefetch indirects for per stream (LEGACY)");
+
+/* dsl_pool.c */
+
+/* dnode.c */
+
+/* dsl_scan.c */
+
 /* metaslab.c */
 
 int
@@ -313,6 +514,19 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
 	"Condense on-disk spacemap when it is more than this many percents"
 	" of in-memory counterpart");
 
+extern uint_t zfs_remove_max_segment;
+
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment,
+	CTLFLAG_RWTUN, &zfs_remove_max_segment, 0,
+	"Largest contiguous segment ZFS will attempt to allocate when removing"
+	" a device");
+
+extern int zfs_removal_suspend_progress;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress,
+	CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0,
+	"Ensures certain actions can happen while in the middle of a removal");
+
 /*
  * Minimum size which forces the dynamic allocator to change
  * it's allocation strategy.  Once the space map cannot satisfy
@@ -535,6 +749,12 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
+	CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	&zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
+	param_set_min_auto_ashift, "IU",
+	"Min ashift used when creating new top-level vdev. (LEGACY)");
+
 int
 param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 {
@@ -554,6 +774,13 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
+	CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	&zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
+	param_set_max_auto_ashift, "IU",
+	"Max ashift used when optimizing for logical -> physical sector size on"
+	" new top-level vdevs. (LEGACY)");
+
 /*
  * Since the DTL space map of a vdev is not expected to have a lot of
  * entries, we default its block size to 4K.
@@ -575,6 +802,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz,
 	CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0,
 	"Block size for standard space map.  Power of 2 greater than 4096.");
 
+extern int vdev_validate_skip;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
+	CTLFLAG_RDTUN, &vdev_validate_skip, 0,
+	"Enable to bypass vdev_validate().");
+
+/* vdev_mirror.c */
+
+/* vdev_queue.c */
+
+extern uint_t zfs_vdev_max_active;
+
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight,
+	CTLFLAG_RWTUN, &zfs_vdev_max_active, 0,
+	"The maximum number of I/Os of all types active for each device."
+	" (LEGACY)");
+
 /* zio.c */
 
 SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata,
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index 591e2dade59e..b677f90280d7 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq;
 static uint_t zfs_arc_evict_threads = 0;
 
 /* The 7 states: */
-static arc_state_t ARC_anon;
-/*  */ arc_state_t ARC_mru;
-static arc_state_t ARC_mru_ghost;
-/*  */ arc_state_t ARC_mfu;
-static arc_state_t ARC_mfu_ghost;
-static arc_state_t ARC_l2c_only;
-static arc_state_t ARC_uncached;
+arc_state_t ARC_anon;
+arc_state_t ARC_mru;
+arc_state_t ARC_mru_ghost;
+arc_state_t ARC_mfu;
+arc_state_t ARC_mfu_ghost;
+arc_state_t ARC_l2c_only;
+arc_state_t ARC_uncached;
 
 arc_stats_t arc_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
@@ -832,15 +832,15 @@ typedef struct arc_async_flush {
 #define	L2ARC_FEED_TYPES	4
 
 /* L2ARC Performance Tunables */
-static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
-static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
-static uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* # of dev writes */
-static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
-static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
-static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;	/* min interval msecs */
-static int l2arc_noprefetch = B_TRUE;		/* don't cache prefetch bufs */
-static int l2arc_feed_again = B_TRUE;		/* turbo warmup */
-static int l2arc_norw = B_FALSE;		/* no reads during writes */
+uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* def max write size */
+uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;	/* extra warmup write */
+uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* # of dev writes */
+uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
+uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;	/* min interval msecs */
+int l2arc_noprefetch = B_TRUE;			/* don't cache prefetch bufs */
+int l2arc_feed_again = B_TRUE;			/* turbo warmup */
+int l2arc_norw = B_FALSE;			/* no reads during writes */
 static uint_t l2arc_meta_percent = 33;	/* limit on headers size */
 
 /*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
index 3d3a9c713568..51165d0bf723 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -57,19 +57,19 @@ static unsigned int	zfetch_max_sec_reap = 2;
 /* min bytes to prefetch per stream (default 2MB) */
 static unsigned int	zfetch_min_distance = 2 * 1024 * 1024;
 /* max bytes to prefetch per stream (default 8MB) */
-static unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
+unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
 #else
 /* min bytes to prefetch per stream (default 4MB) */
 static unsigned int	zfetch_min_distance = 4 * 1024 * 1024;
 /* max bytes to prefetch per stream (default 64MB) */
-static unsigned int	zfetch_max_distance = 64 * 1024 * 1024;
+unsigned int	zfetch_max_distance = 64 * 1024 * 1024;
 #endif
 /* max bytes to prefetch indirects for per stream (default 128MB) */
-static unsigned int	zfetch_max_idistance = 128 * 1024 * 1024;
+unsigned int	zfetch_max_idistance = 128 * 1024 * 1024;
 /* max request reorder distance within a stream (default 16MB) */
-static unsigned int	zfetch_max_reorder = 16 * 1024 * 1024;
+unsigned int	zfetch_max_reorder = 16 * 1024 * 1024;
 /* Max log2 fraction of holes in a stream */
-static unsigned int	zfetch_hole_shift = 2;
+unsigned int	zfetch_hole_shift = 2;
 
 typedef struct zfetch_stats {
 	kstat_named_t zfetchstat_hits;
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 654e034de9e1..c8d7280387a2 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29;
 /* upper limit for metaslab size (16G) */
 static uint_t zfs_vdev_max_ms_shift = 34;
 
-static int vdev_validate_skip = B_FALSE;
+int vdev_validate_skip = B_FALSE;
 
 /*
  * Since the DTL space map of a vdev is not expected to have a lot of
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
index e69e5598939e..c12713b107bf 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -122,7 +122,7 @@
  * The maximum number of i/os active to each device.  Ideally, this will be >=
  * the sum of each queue's max_active.
  */
-static uint_t zfs_vdev_max_active = 1000;
+uint_t zfs_vdev_max_active = 1000;
 
 /*
  * Per-queue limits on the number of i/os active to each device.  If the
diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c
index 2ce0121324ad..2f7a739da241 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_removal.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c
@@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024;
  *
  * See also the accessor function spa_remove_max_segment().
  */
-static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
+uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
 
 /*
  * Ignore hard IO errors during device removal.  When set if a device
@@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024;
  * This is used by the test suite so that it can ensure that certain
  * actions happen while in the middle of a removal.
  */
-static int zfs_removal_suspend_progress = 0;
+int zfs_removal_suspend_progress = 0;
 
 #define	VDEV_REMOVAL_ZAP_OBJS	"lzap"
 
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
index 54b50c9dba77..127ea188f17f 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
@@ -76,8 +76,8 @@ READ_SIT_OUT_SECS		vdev.read_sit_out_secs		vdev_read_sit_out_secs
 SIT_OUT_CHECK_INTERVAL		vdev.raidz_outlier_check_interval_ms	vdev_raidz_outlier_check_interval_ms
 SIT_OUT_INSENSITIVITY		vdev.raidz_outlier_insensitivity	vdev_raidz_outlier_insensitivity
 REBUILD_SCRUB_ENABLED		rebuild_scrub_enabled		zfs_rebuild_scrub_enabled
-REMOVAL_SUSPEND_PROGRESS	vdev.removal_suspend_progress	zfs_removal_suspend_progress
-REMOVE_MAX_SEGMENT		vdev.remove_max_segment		zfs_remove_max_segment
+REMOVAL_SUSPEND_PROGRESS	removal_suspend_progress	zfs_removal_suspend_progress
+REMOVE_MAX_SEGMENT		remove_max_segment		zfs_remove_max_segment
 RESILVER_MIN_TIME_MS		resilver_min_time_ms		zfs_resilver_min_time_ms
 RESILVER_DEFER_PERCENT		resilver_defer_percent		zfs_resilver_defer_percent
 SCAN_LEGACY			scan_legacy			zfs_scan_legacy
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index 7f9ca6e39df8..3f0a7b40245d 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -3468,10 +3468,10 @@ acpi_EnterSleepState(struct acpi_softc *sc, enum power_stype stype)
 	return_ACPI_STATUS (AE_OK);
     }
 
-    EVENTHANDLER_INVOKE(power_suspend_early);
+    EVENTHANDLER_INVOKE(power_suspend_early, stype);
     stop_all_proc();
     suspend_all_fs();
-    EVENTHANDLER_INVOKE(power_suspend);
+    EVENTHANDLER_INVOKE(power_suspend, stype);
 
 #ifdef EARLY_AP_STARTUP
     MPASS(mp_ncpus == 1 || smp_started);
@@ -3632,7 +3632,7 @@ backout:
     resume_all_fs();
     resume_all_proc();
 
-    EVENTHANDLER_INVOKE(power_resume);
+    EVENTHANDLER_INVOKE(power_resume, stype);
 
     /* Allow another sleep request after a while. */
     callout_schedule(&acpi_sleep_timer, hz * ACPI_MINIMUM_AWAKETIME);
diff --git a/sys/dev/acpica/acpi_timer.c b/sys/dev/acpica/acpi_timer.c
index 3d51a4211b80..b20912e2f5fb 100644
--- a/sys/dev/acpica/acpi_timer.c
+++ b/sys/dev/acpica/acpi_timer.c
@@ -34,6 +34,7 @@
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/timetc.h>
+#include <sys/power.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
@@ -69,8 +70,10 @@ bool acpi_timer_disabled = false;
 static void	acpi_timer_identify(driver_t *driver, device_t parent);
 static int	acpi_timer_probe(device_t dev);
 static int	acpi_timer_attach(device_t dev);
-static void	acpi_timer_resume_handler(struct timecounter *);
-static void	acpi_timer_suspend_handler(struct timecounter *);
+static void	acpi_timer_resume_handler(struct timecounter *,
+		    enum power_stype);
+static void	acpi_timer_suspend_handler(struct timecounter *,
+		    enum power_stype);
 static u_int	acpi_timer_get_timecount(struct timecounter *tc);
 static u_int	acpi_timer_get_timecount_safe(struct timecounter *tc);
 static int	acpi_timer_sysctl_freq(SYSCTL_HANDLER_ARGS);
@@ -235,7 +238,7 @@ acpi_timer_attach(device_t dev)
 }
 
 static void
-acpi_timer_resume_handler(struct timecounter *newtc)
+acpi_timer_resume_handler(struct timecounter *newtc, enum power_stype stype)
 {
 	struct timecounter *tc;
 
@@ -251,7 +254,7 @@ acpi_timer_resume_handler(struct timecounter *newtc)
 }
 
 static void
-acpi_timer_suspend_handler(struct timecounter *newtc)
+acpi_timer_suspend_handler(struct timecounter *newtc, enum power_stype stype)
 {
 	struct timecounter *tc;
 
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 52f9e12f8f9a..52e9fcbbebcd 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -463,13 +463,13 @@ static __inline void
 nvme_completion_poll(struct nvme_completion_poll_status *status)
 {
 	int timeout = ticks + 10 * hz;
-	sbintime_t delta_t = SBT_1US;
+	sbintime_t delta = SBT_1US;
 
 	while (!atomic_load_acq_int(&status->done)) {
 		if (timeout - ticks < 0)
 			panic("NVME polled command failed to complete within 10s.");
-		pause_sbt("nvme", delta_t, 0, C_PREL(1));
-		delta_t = min(SBT_1MS, delta_t * 3 / 2);
+		pause_sbt("nvme", delta, 0, C_PREL(1));
+		delta = min(SBT_1MS, delta + delta / 2);
 	}
 }
 
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b51ef6766de4..bcf67ddc9689 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -195,8 +195,8 @@ static void vt_update_static(void *);
 #ifndef SC_NO_CUTPASTE
 static void vt_mouse_paste(void);
 #endif
-static void vt_suspend_handler(void *priv);
-static void vt_resume_handler(void *priv);
+static void vt_suspend_handler(void *priv, enum power_stype stype);
+static void vt_resume_handler(void *priv, enum power_stype stype);
 
 SET_DECLARE(vt_drv_set, struct vt_driver);
 
@@ -3330,7 +3330,7 @@ vt_replace_backend(const struct vt_driver *drv, void *softc)
 }
 
 static void
-vt_suspend_handler(void *priv)
+vt_suspend_handler(void *priv, enum power_stype stype)
 {
 	struct vt_device *vd;
 
@@ -3341,7 +3341,7 @@ vt_suspend_handler(void *priv)
 }
 
 static void
-vt_resume_handler(void *priv)
+vt_resume_handler(void *priv, enum power_stype stype)
 {
 	struct vt_device *vd;
 
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 123df4992894..2c61b48c0451 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -91,6 +91,7 @@
 #include <sys/smp.h>
 #include <sys/eventhandler.h>
 #include <sys/timetc.h>
+#include <sys/power.h>
 
 #include <geom/geom.h>
 
@@ -175,12 +176,12 @@ xctrl_suspend(void)
 	cpuset_t cpu_suspend_map;
 #endif
 
-	EVENTHANDLER_INVOKE(power_suspend_early);
+	EVENTHANDLER_INVOKE(power_suspend_early, POWER_STYPE_SUSPEND_TO_MEM);
 	xs_lock();
 	stop_all_proc();
 	xs_unlock();
 	suspend_all_fs();
-	EVENTHANDLER_INVOKE(power_suspend);
+	EVENTHANDLER_INVOKE(power_suspend, POWER_STYPE_SUSPEND_TO_MEM);
 
 #ifdef EARLY_AP_STARTUP
 	MPASS(mp_ncpus == 1 || smp_started);
@@ -297,7 +298,7 @@ xctrl_suspend(void)
 	resume_all_fs();
 	resume_all_proc();
 
-	EVENTHANDLER_INVOKE(power_resume);
+	EVENTHANDLER_INVOKE(power_resume, POWER_STYPE_SUSPEND_TO_MEM);
 
 	if (bootverbose)
 		printf("System resumed after suspension\n");
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index bb0ff9966dfd..d7f847d449d0 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -181,7 +181,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
 
 	hd = NULL_NHASH(xp->null_lowervp);
 #ifdef INVARIANTS
-	CK_LIST_FOREACH(oxp, hd, null_hash) {
+	CK_SLIST_FOREACH(oxp, hd, null_hash) {
 		if (oxp->null_lowervp == xp->null_lowervp &&
 		    NULLTOV(oxp)->v_mount == mp) {
 			VNASSERT(0, NULLTOV(oxp),
diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c
index 4c0d0c3aa902..1e4236507fa4 100644
--- a/sys/geom/part/g_part.c
+++ b/sys/geom/part/g_part.c
@@ -122,13 +122,13 @@ struct g_part_alias_list {
 	{ "ntfs", G_PART_ALIAS_MS_NTFS },
 	{ "openbsd-data", G_PART_ALIAS_OPENBSD_DATA },
 	{ "prep-boot", G_PART_ALIAS_PREP_BOOT },
-        { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT },
-        { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT },
-        { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP },
-        { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP },
-        { "solaris-var", G_PART_ALIAS_SOLARIS_VAR },
-        { "solaris-home", G_PART_ALIAS_SOLARIS_HOME },
-        { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC },
+	{ "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT },
+	{ "solaris-root", G_PART_ALIAS_SOLARIS_ROOT },
+	{ "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP },
+	{ "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP },
+	{ "solaris-var", G_PART_ALIAS_SOLARIS_VAR },
+	{ "solaris-home", G_PART_ALIAS_SOLARIS_HOME },
+	{ "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC },
 	{ "solaris-reserved", G_PART_ALIAS_SOLARIS_RESERVED },
 	{ "u-boot-env", G_PART_ALIAS_U_BOOT_ENV },
 	{ "vmware-reserved", G_PART_ALIAS_VMRESERVED },
diff --git a/sys/i386/acpica/acpi_wakeup.c b/sys/i386/acpica/acpi_wakeup.c
index 2d60d5e037a0..96be64de017b 100644
--- a/sys/i386/acpica/acpi_wakeup.c
+++ b/sys/i386/acpica/acpi_wakeup.c
@@ -84,7 +84,7 @@ static cpuset_t		suspcpus;
 static struct susppcb	**susppcbs;
 #endif
 
-static void		acpi_stop_beep(void *);
+static void		acpi_stop_beep(void *, enum power_stype);
 
 #ifdef SMP
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
@@ -100,7 +100,7 @@ static void		acpi_wakeup_cpus(struct acpi_softc *);
 } while (0)
 
 static void
-acpi_stop_beep(void *arg)
+acpi_stop_beep(void *arg, enum power_stype stype)
 {
 
 	if (acpi_resume_beep != 0)
diff --git a/sys/net80211/ieee80211.c b/sys/net80211/ieee80211.c
index 2b7cf635b9f5..1299f86ebdc7 100644
--- a/sys/net80211/ieee80211.c
+++ b/sys/net80211/ieee80211.c
@@ -2689,13 +2689,18 @@ ieee80211_channel_type_char(const struct ieee80211_channel *c)
 	return 'f';
 }
 
-/*
- * Determine whether the given key in the given VAP is a global key.
+/**
+ * @brief Determine whether the given key in the given VAP is a global key.
+ *
  * (key index 0..3, shared between all stations on a VAP.)
  *
  * This is either a WEP key or a GROUP key.
  *
  * Note this will NOT return true if it is a IGTK key.
+ *
+ * @param vap the current VAP
+ * @param key ieee80211_key to use/check
+ * @returns true if it's a global/WEP key, false otherwise
  */
 bool
 ieee80211_is_key_global(const struct ieee80211vap *vap,
@@ -2705,8 +2710,23 @@ ieee80211_is_key_global(const struct ieee80211vap *vap,
 	    key < &vap->iv_nw_keys[IEEE80211_WEP_NKID]);
 }
 
-/*
- * Determine whether the given key in the given VAP is a unicast key.
+/**
+ * @brief Determine whether the given key in the given VAP is a unicast key.
+ *
+ * This only returns true if it's a unicast key.
+ *
+ * Note: For now net80211 only supports a single unicast key, stored in
+ * an ieee80211_node entry.
+ *
+ * Code should use this to know if it's a unicast key and then call
+ * ieee80211_crypto_get_keyid() to get the 802.11 key ID (0..3 for
+ * unicast/global keys, 4..5 for IGTK keys.)  Since the unicast
+ * and global key indexes "overlap", callers will need to check
+ * both the type and id.
+ *
+ * @param vap the current VAP
+ * @param key ieee80211_key to use/check
+ * @returns true if the key is a unicast key, false if it is not
  */
 bool
 ieee80211_is_key_unicast(const struct ieee80211vap *vap,
diff --git a/sys/net80211/ieee80211_crypto.c b/sys/net80211/ieee80211_crypto.c
index 1e63ca46f28f..566f0b2e0c23 100644
--- a/sys/net80211/ieee80211_crypto.c
+++ b/sys/net80211/ieee80211_crypto.c
@@ -611,11 +611,15 @@ ieee80211_crypto_setkey(struct ieee80211vap *vap, struct ieee80211_key *key)
 	return dev_key_set(vap, key);
 }
 
-/*
- * Return index if the key is a WEP key (0..3); -1 otherwise.
+/**
+ * @brief Return index if the key is a WEP key (0..3); -1 otherwise.
  *
  * This is different to "get_keyid" which defaults to returning
  * 0 for unicast keys; it assumes that it won't be used for WEP.
+ *
+ * @param vap the current VAP
+ * @param k ieee80211_key to check
+ * @returns 0..3 if it's a global/WEP key, -1 otherwise.
  */
 int
 ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap,
@@ -628,8 +632,18 @@ ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap,
 	return (-1);
 }
 
-/*
- * Note: only supports a single unicast key (0).
+/**
+ * @brief Return the index of a unicast, global or IGTK key.
+ *
+ * Return the index of a key.  For unicast keys the index is 0..1.
+ * For global/WEP keys it's 0..3.  For IGTK keys its 4..5.
+ *
+ * TODO: support >1 unicast key
+ * TODO: support IGTK keys
+ *
+ * @param vap the current VAP
+ * @param k ieee80211_key to check
+ * @returns 0..3 for a WEP/global key, 0..1 for unicast key, 4..5 for IGTK key
  */
 uint8_t
 ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k)
@@ -641,6 +655,19 @@ ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k)
 	return (0);
 }
 
+/**
+ * @param Return the key to use for encrypting an mbuf frame to a node
+ *
+ * This routine chooses a suitable key used to encrypt the given frame with.
+ * It doesn't do the encryption; it only chooses the key.  If a key is not
+ * available then the routine will return NULL.
+ *
+ * It's up to the caller to enforce whether a key is absolutely required or not.
+ *
+ * @param ni The ieee80211_node to send the frame to
+ * @param m the mbuf to encrypt
+ * @returns the ieee80211_key to encrypt with, or NULL if there's no suitable key
+ */
 struct ieee80211_key *
 ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m)
 {
@@ -676,8 +703,28 @@ ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m)
 	return &ni->ni_ucastkey;
 }
 
-/*
- * Add privacy headers appropriate for the specified key.
+/**
+ * @brief Privacy encapsulate and encrypt the given mbuf.
+ *
+ * This routine handles the mechanics of encryption - expanding the
+ * mbuf to add privacy headers, IV, ICV, MIC, MMIC, and then encrypts
+ * the given mbuf if required.
+ *
+ * This should be called by the driver in its TX path as part of
+ * encapsulation before passing frames to the hardware/firmware
+ * queues.
+ *
+ * Drivers/hardware which does its own entirely offload path
+ * should still call this for completeness - it indicates to the
+ * driver that the frame itself should be encrypted.
+ *
+ * The driver should have set capability bits in the attach /
+ * key allocation path to disable various encapsulation/encryption
+ * features.
+ *
+ * @param ni ieee80211_node for this frame
+ * @param mbuf mbuf to modify
+ * @returns the key used if the frame is to be encrypted, NULL otherwise
  */
 struct ieee80211_key *
 ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m)
@@ -693,9 +740,31 @@ ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m)
 	return NULL;
 }
 
-/*
- * Validate and strip privacy headers (and trailer) for a
- * received frame that has the WEP/Privacy bit set.
+/**
+ * @brief Decapsulate and validate an encrypted frame.
+ *
+ * This handles an encrypted frame (one with the privacy bit set.)
+ * It also obeys the key / config / receive packet flags for how
+ * the driver says its already been processed.
+ *
+ * Unlike ieee80211_crypto_encap(), this isn't called in the driver.
+ * Instead, drivers passed the potentially decrypted frame - fully,
+ * partial, or not at all - and net80211 will call this as appropriate.
+ *
+ * This handles NICs (like ath(4)) which have a variable size between
+ * the 802.11 header and 802.11 payload due to DMA alignment / encryption
+ * engine concerns.
+ *
+ * If the frame was decrypted and validated successfully then 1 is returned
+ * and the mbuf can be treated as an 802.11 frame.  If it is not decrypted
+ * successfully or it was decrypted but failed validation/checks, then
+ * 0 is returned.
+ *
+ * @param ni ieee80211_node for received frame
+ * @param m mbuf frame to receive
+ * @param hdrlen length of the 802.11 header, including trailing null bytes
+ * @param key pointer to ieee80211_key that will be set if appropriate
+ * @returns 0 if the frame wasn't decrypted/validated, 1 if decrypted/validated.
  */
 int
 ieee80211_crypto_decap(struct ieee80211_node *ni, struct mbuf *m, int hdrlen,
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 64efa4bf060f..9b5baf115855 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
  	}
 
 	/* create sequence number */
-	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
-	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
-	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
-	    ((uint64_t)lc->lro_mbuf_count);
+	lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count;
+	if (M_HASHTYPE_ISHASH(mb))
+		lc->lro_mbuf_data[lc->lro_mbuf_count].seq |=
+		    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
+		    (((uint64_t)mb->m_pkthdr.flowid) << 24);
 
 	/* enter mbuf */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 2bb99596f965..f842a5678fa1 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1285,7 +1285,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "segment rejected\n",
 				    s, __func__, th->th_ack, sc->sc_iss + 1);
 			SCH_UNLOCK(sch);
-			goto failed;
+			free(s, M_TCPLOG);
+			return (0);  /* Do send RST, do not free sc. */
 		}
 
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 4f756a75fac7..b98703bdfbfe 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -1295,8 +1295,8 @@ in6_addifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *i
 	 */
 	bzero(&pr0, sizeof(pr0));
 	pr0.ndpr_ifp = ifp;
-	pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
-	    NULL);
+	pr0.ndpr_plen = ia->ia_plen =
+	    in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL);
 	if (pr0.ndpr_plen == 128) {
 		/* we don't need to install a host route. */
 		goto aifaddr_out;
@@ -1490,16 +1490,16 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 	 * positive reference.
 	 */
 	remove_lle = 0;
-	if (ia->ia6_ndpr == NULL) {
-		nd6log((LOG_NOTICE,
-		    "in6_unlink_ifa: autoconf'ed address "
-		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
-	} else {
+	if (ia->ia6_ndpr != NULL) {
 		ia->ia6_ndpr->ndpr_addrcnt--;
 		/* Do not delete lles within prefix if refcont != 0 */
 		if (ia->ia6_ndpr->ndpr_addrcnt == 0)
 			remove_lle = 1;
 		ia->ia6_ndpr = NULL;
+	} else if (ia->ia_plen < 128) {
+		nd6log((LOG_NOTICE,
+		    "in6_unlink_ifa: autoconf'ed address "
+		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
 	}
 
 	nd6_rem_ifa_lle(ia, remove_lle);
diff --git a/sys/netlink/netlink_snl.h b/sys/netlink/netlink_snl.h
index 6dd8a9cbdb35..57f7e1e29d08 100644
--- a/sys/netlink/netlink_snl.h
+++ b/sys/netlink/netlink_snl.h
@@ -1068,14 +1068,14 @@ snl_init_writer(struct snl_state *ss, struct snl_writer *nw)
 {
 	nw->size = SNL_WRITER_BUFFER_SIZE;
 	nw->base = (char *)snl_allocz(ss, nw->size);
-	if (nw->base == NULL) {
+	if (__predict_false(nw->base == NULL)) {
 		nw->error = true;
 		nw->size = 0;
-	}
+	} else
+		nw->error = false;
 
 	nw->offset = 0;
 	nw->hdr = NULL;
-	nw->error = false;
 	nw->ss = ss;
 }
 
diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h
index c0d9811dd1b9..29a16b393b52 100644
--- a/sys/sys/eventhandler.h
+++ b/sys/sys/eventhandler.h
@@ -33,6 +33,7 @@
 #include <sys/lock.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
+#include <sys/power.h>
 #include <sys/queue.h>
 
 #ifdef VIMAGE
@@ -201,7 +202,7 @@ EVENTHANDLER_DECLARE(shutdown_post_sync, shutdown_fn);	/* after fs sync */
 EVENTHANDLER_DECLARE(shutdown_final, shutdown_fn);
 
 /* Power state change events */
-typedef void (*power_change_fn)(void *);
+typedef void (*power_change_fn)(void *, enum power_stype stype);
 EVENTHANDLER_DECLARE(power_resume, power_change_fn);
 EVENTHANDLER_DECLARE(power_suspend, power_change_fn);
 EVENTHANDLER_DECLARE(power_suspend_early, power_change_fn);
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 679b2e20e88b..b80b5cc781f7 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -4009,21 +4009,15 @@ restart:
 	/*
 	 * Use the keg's policy if upper layers haven't already specified a
 	 * domain (as happens with first-touch zones).
-	 *
-	 * To avoid races we run the iterator with the keg lock held, but that
-	 * means that we cannot allow the vm_domainset layer to sleep.  Thus,
-	 * clear M_WAITOK and handle low memory conditions locally.
 	 */
 	rr = rdomain == UMA_ANYDOMAIN;
+	aflags = flags;
 	if (rr) {
-		aflags = (flags & ~M_WAITOK) | M_NOWAIT;
 		if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags) != 0)
 			return (NULL);
-	} else {
-		aflags = flags;
+	} else
 		domain = rdomain;
-	}
 
 	for (;;) {
 		slab = keg_fetch_free_slab(keg, domain, rr, flags);
@@ -4053,13 +4047,8 @@ restart:
 			if ((flags & M_WAITOK) == 0)
 				break;
 			vm_wait_domain(domain);
-		} else if (vm_domainset_iter_policy(&di, &domain) != 0) {
-			if ((flags & M_WAITOK) != 0) {
-				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
-				goto restart;
-			}
+		} else if (vm_domainset_iter_policy(&di, &domain) != 0)
 			break;
-		}
 	}
 
 	/*
@@ -5245,7 +5234,7 @@ uma_prealloc(uma_zone_t zone, int items)
 	KEG_GET(zone, keg);
 	slabs = howmany(items, keg->uk_ipers);
 	while (slabs-- > 0) {
-		aflags = M_NOWAIT;
+		aflags = M_WAITOK;
 		if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
 		    &aflags) != 0)
 			panic("%s: Domainset is empty", __func__);
@@ -5266,7 +5255,8 @@ uma_prealloc(uma_zone_t zone, int items)
 				break;
 			}
 			if (vm_domainset_iter_policy(&di, &domain) != 0)
-				vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0);
+				panic("%s: Cannot allocate from any domain",
+				    __func__);
 		}
 	}
 }
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index e43c88b3a27b..735efe307215 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -46,9 +46,11 @@
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <machine/intr_machdep.h>
@@ -124,6 +126,22 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN,
     &workaround_erratum383, 0,
     "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
 
+#ifdef DIAGNOSTIC
+static uint64_t fake_status;
+SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW,
+    &fake_status, 0,
+    "Insert artificial MCA with given status (testing purpose only)");
+static int fake_bank;
+SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW,
+    &fake_bank, 0,
+    "Bank to use for artificial MCAs (testing purpose only)");
+#endif
+
+static bool mca_uselog = false;
+SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0,
+    "Should the system send non-fatal machine check errors to the log "
+    "(instead of the console)?");
+
 static STAILQ_HEAD(, mca_internal) mca_freelist;
 static int mca_freecount;
 static STAILQ_HEAD(, mca_internal) mca_records;
@@ -136,12 +154,40 @@ static struct timeout_task mca_scan_task;
 static struct mtx mca_lock;
 static bool mca_startup_done = false;
 
-/* Statistics on number of MCA events by type, updated atomically. */
+/* Static buffer to compose messages while in an interrupt context. */
+static char mca_msg_buf[1024];
+static struct mtx mca_msg_buf_lock;
+
+/* Statistics on number of MCA events by type, updated with the mca_lock. */
 static uint64_t mca_stats[MCA_T_COUNT];
 SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP,
     mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]),
     "S", "Array of MCA events by type");
 
+/* Variables to track and control message rate limiting. */
+static struct timeval mca_last_log_time;
+static struct timeval mca_log_interval;
+static int mca_log_skipped;
+
+static int
+sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	u_int val;
+
+	val = mca_log_interval.tv_sec;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	mca_log_interval.tv_sec = val;
+	return (0);
+}
+SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval,
+    CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0,
+    sysctl_mca_log_interval, "IU",
+    "Minimum number of seconds between logging correctable MCAs"
+    " (0 = no limit)");
+
 static unsigned int
 mca_ia32_ctl_reg(int bank)
 {
@@ -437,98 +483,111 @@ mca_mute(const struct mca_record *rec)
 
 /* Dump details about a single machine check. */
 static void
-mca_log(const struct mca_record *rec)
+mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal)
 {
+	int error, numskipped;
 	uint16_t mca_error;
 	enum mca_stat_types event_type;
+	struct sbuf sb;
+	bool uncor, using_shared_buf;
 
 	if (mca_mute(rec))
 		return;
 
-	if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 &&
-	    (!tes_supported(rec->mr_mcg_cap) ||
+	uncor = (rec->mr_status & MC_STATUS_UC) != 0;
+
+	if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) ||
 	    ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2))
 		return;
 
-	printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
+	/* Try to use an allocated buffer when not in an interrupt context. */
+	if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL)
+		using_shared_buf = false;
+	else {
+		using_shared_buf = true;
+		mtx_lock_spin(&mca_msg_buf_lock);
+		sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN);
+	}
+
+	sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
 	    (long long)rec->mr_status);
-	printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+	sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
 	    (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
-	printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
-	    rec->mr_cpu_id, rec->mr_apic_id);
-	printf("MCA: CPU %d ", rec->mr_cpu);
+	sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n",
+	    cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id);
+	sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu);
 	if (rec->mr_status & MC_STATUS_UC)
-		printf("UNCOR ");
+		sbuf_printf(&sb, "UNCOR ");
 	else {
-		printf("COR ");
+		sbuf_printf(&sb, "COR ");
 		if (cmci_supported(rec->mr_mcg_cap))
-			printf("(%lld) ", ((long long)rec->mr_status &
+			sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status &
 			    MC_STATUS_COR_COUNT) >> 38);
 		if (tes_supported(rec->mr_mcg_cap)) {
 			switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) {
 			case 0x1:
-				printf("(Green) ");
+				sbuf_printf(&sb, "(Green) ");
 				break;
 			case 0x2:
-				printf("(Yellow) ");
+				sbuf_printf(&sb, "(Yellow) ");
 				break;
 			}
 		}
 	}
 	if (rec->mr_status & MC_STATUS_EN)
-		printf("EN ");
+		sbuf_printf(&sb, "EN ");
 	if (rec->mr_status & MC_STATUS_PCC)
-		printf("PCC ");
+		sbuf_printf(&sb, "PCC ");
 	if (ser_supported(rec->mr_mcg_cap)) {
 		if (rec->mr_status & MC_STATUS_S)
-			printf("S ");
+			sbuf_printf(&sb, "S ");
 		if (rec->mr_status & MC_STATUS_AR)
-			printf("AR ");
+			sbuf_printf(&sb, "AR ");
 	}
 	if (rec->mr_status & MC_STATUS_OVER)
-		printf("OVER ");
+		sbuf_printf(&sb, "OVER ");
 	mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
 	event_type = MCA_T_COUNT;
 	switch (mca_error) {
 		/* Simple error codes. */
 	case 0x0000:
-		printf("no error");
+		sbuf_printf(&sb, "no error");
 		event_type = MCA_T_NONE;
 		break;
 	case 0x0001:
-		printf("unclassified error");
+		sbuf_printf(&sb, "unclassified error");
 		event_type = MCA_T_UNCLASSIFIED;
 		break;
 	case 0x0002:
-		printf("ucode ROM parity error");
+		sbuf_printf(&sb, "ucode ROM parity error");
 		event_type = MCA_T_UCODE_ROM_PARITY;
 		break;
 	case 0x0003:
-		printf("external error");
+		sbuf_printf(&sb, "external error");
 		event_type = MCA_T_EXTERNAL;
 		break;
 	case 0x0004:
-		printf("FRC error");
+		sbuf_printf(&sb, "FRC error");
 		event_type = MCA_T_FRC;
 		break;
 	case 0x0005:
-		printf("internal parity error");
+		sbuf_printf(&sb, "internal parity error");
 		event_type = MCA_T_INTERNAL_PARITY;
 		break;
 	case 0x0006:
-		printf("SMM handler code access violation");
+		sbuf_printf(&sb, "SMM handler code access violation");
 		event_type = MCA_T_SMM_HANDLER;
 		break;
 	case 0x0400:
-		printf("internal timer error");
+		sbuf_printf(&sb, "internal timer error");
 		event_type = MCA_T_INTERNAL_TIMER;
 		break;
 	case 0x0e0b:
-		printf("generic I/O error");
+		sbuf_printf(&sb, "generic I/O error");
 		event_type = MCA_T_GENERIC_IO;
 		if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL &&
 		    (rec->mr_status & MC_STATUS_MISCV)) {
-			printf(" (pci%d:%d:%d:%d)",
+			sbuf_printf(&sb, " (pci%d:%d:%d:%d)",
 			    (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32),
 			    (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24),
 			    (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19),
@@ -537,7 +596,8 @@ mca_log(const struct mca_record *rec)
 		break;
 	default:
 		if ((mca_error & 0xfc00) == 0x0400) {
-			printf("internal error %x", mca_error & 0x03ff);
+			sbuf_printf(&sb, "internal error %x",
+			    mca_error & 0x03ff);
 			event_type = MCA_T_INTERNAL;
 			break;
 		}
@@ -546,14 +606,16 @@ mca_log(const struct mca_record *rec)
 
 		/* Memory hierarchy error. */
 		if ((mca_error & 0xeffc) == 0x000c) {
-			printf("%s memory error", mca_error_level(mca_error));
+			sbuf_printf(&sb, "%s memory error",
+			    mca_error_level(mca_error));
 			event_type = MCA_T_MEMORY;
 			break;
 		}
 
 		/* TLB error. */
 		if ((mca_error & 0xeff0) == 0x0010) {
-			printf("%sTLB %s error", mca_error_ttype(mca_error),
+			sbuf_printf(&sb, "%sTLB %s error",
+			    mca_error_ttype(mca_error),
 			    mca_error_level(mca_error));
 			event_type = MCA_T_TLB;
 			break;
@@ -561,19 +623,19 @@ mca_log(const struct mca_record *rec)
 
 		/* Memory controller error. */
 		if ((mca_error & 0xef80) == 0x0080) {
-			printf("%s channel ", mca_error_mmtype(mca_error,
-			    &event_type));
+			sbuf_printf(&sb, "%s channel ",
+			    mca_error_mmtype(mca_error, &event_type));
 			if ((mca_error & 0x000f) != 0x000f)
-				printf("%d", mca_error & 0x000f);
+				sbuf_printf(&sb, "%d", mca_error & 0x000f);
 			else
-				printf("??");
-			printf(" memory error");
+				sbuf_printf(&sb, "??");
+			sbuf_printf(&sb, " memory error");
 			break;
 		}
 
 		/* Cache error. */
 		if ((mca_error & 0xef00) == 0x0100) {
-			printf("%sCACHE %s %s error",
+			sbuf_printf(&sb, "%sCACHE %s %s error",
 			    mca_error_ttype(mca_error),
 			    mca_error_level(mca_error),
 			    mca_error_request(mca_error));
@@ -583,77 +645,129 @@ mca_log(const struct mca_record *rec)
 
 		/* Extended memory error. */
 		if ((mca_error & 0xef80) == 0x0280) {
-			printf("%s channel ", mca_error_mmtype(mca_error,
-			    &event_type));
+			sbuf_printf(&sb, "%s channel ",
+			    mca_error_mmtype(mca_error, &event_type));
 			if ((mca_error & 0x000f) != 0x000f)
-				printf("%d", mca_error & 0x000f);
+				sbuf_printf(&sb, "%d", mca_error & 0x000f);
 			else
-				printf("??");
-			printf(" extended memory error");
+				sbuf_printf(&sb, "??");
+			sbuf_printf(&sb, " extended memory error");
 			break;
 		}
 
 		/* Bus and/or Interconnect error. */
 		if ((mca_error & 0xe800) == 0x0800) {
-			printf("BUS%s ", mca_error_level(mca_error));
+			sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error));
 			event_type = MCA_T_BUS;
 			switch ((mca_error & 0x0600) >> 9) {
 			case 0:
-				printf("Source");
+				sbuf_printf(&sb, "Source");
 				break;
 			case 1:
-				printf("Responder");
+				sbuf_printf(&sb, "Responder");
 				break;
 			case 2:
-				printf("Observer");
+				sbuf_printf(&sb, "Observer");
 				break;
 			default:
-				printf("???");
+				sbuf_printf(&sb, "???");
 				break;
 			}
-			printf(" %s ", mca_error_request(mca_error));
+			sbuf_printf(&sb, " %s ", mca_error_request(mca_error));
 			switch ((mca_error & 0x000c) >> 2) {
 			case 0:
-				printf("Memory");
+				sbuf_printf(&sb, "Memory");
 				break;
 			case 2:
-				printf("I/O");
+				sbuf_printf(&sb, "I/O");
 				break;
 			case 3:
-				printf("Other");
+				sbuf_printf(&sb, "Other");
 				break;
 			default:
-				printf("???");
+				sbuf_printf(&sb, "???");
 				break;
 			}
 			if (mca_error & 0x0100)
-				printf(" timed out");
+				sbuf_printf(&sb, " timed out");
 			break;
 		}
 
-		printf("unknown error %x", mca_error);
+		sbuf_printf(&sb, "unknown error %x", mca_error);
 		event_type = MCA_T_UNKNOWN;
 		break;
 	}
-	printf("\n");
+	sbuf_printf(&sb, "\n");
 	if (rec->mr_status & MC_STATUS_ADDRV) {
-		printf("MCA: Address 0x%llx", (long long)rec->mr_addr);
+		sbuf_printf(&sb, "MCA: Address 0x%llx",
+		    (long long)rec->mr_addr);
 		if (ser_supported(rec->mr_mcg_cap) &&
 		    (rec->mr_status & MC_STATUS_MISCV)) {
-			printf(" (Mode: %s, LSB: %d)",
+			sbuf_printf(&sb, " (Mode: %s, LSB: %d)",
 			    mca_addres_mode(rec->mr_misc),
 			    (int)(rec->mr_misc & MC_MISC_RA_LSB));
 		}
-		printf("\n");
+		sbuf_printf(&sb, "\n");
 	}
 	if (rec->mr_status & MC_STATUS_MISCV)
-		printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+		sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+
 	if (event_type < 0 || event_type >= MCA_T_COUNT) {
 		KASSERT(0, ("%s: invalid event type (%d)", __func__,
 		    event_type));
 		event_type = MCA_T_UNKNOWN;
 	}
-	atomic_add_64(&mca_stats[event_type], 1);
+	numskipped = 0;
+	if (!fatal && !uncor) {
+		/*
+		 * Update statistics and check the rate limit for
+		 * correctable errors. The rate limit is only applied
+		 * after the system records a reasonable number of errors
+		 * of the same type. The goal is to reduce the impact of
+		 * the system seeing and attempting to log a burst of
+		 * similar errors, which (especially when printed to the
+		 * console) can be expensive.
+		 */
+		mtx_lock_spin(&mca_lock);
+		mca_stats[event_type]++;
+		if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 &&
+		    ratecheck(&mca_last_log_time, &mca_log_interval) == 0) {
+			mca_log_skipped++;
+			mtx_unlock_spin(&mca_lock);
+			goto done;
+		}
+		numskipped = mca_log_skipped;
+		mca_log_skipped = 0;
+		mtx_unlock_spin(&mca_lock);
+	}
+
+	error = sbuf_finish(&sb);
+	if (fatal || !mca_uselog) {
+		if (numskipped > 0)
+			printf("MCA: %d events skipped due to rate limit\n",
+			    numskipped);
+		if (error)
+			printf("MCA: error logging message (sbuf error %d)\n",
+			    error);
+		else
+			sbuf_putbuf(&sb);
+	} else {
+		if (numskipped > 0)
+			log(LOG_ERR,
+			    "MCA: %d events skipped due to rate limit\n",
+			    numskipped);
+		if (error)
+			log(LOG_ERR,
+			    "MCA: error logging message (sbuf error %d)\n",
+			    error);
+		else
+			log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb));
+	}
+
+done:
+	sbuf_delete(&sb);
+	if (using_shared_buf)
+		mtx_unlock_spin(&mca_msg_buf_lock);
 }
 
 static bool
@@ -701,8 +815,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank,
 	bool mce, recover;
 
 	status = rdmsr(mca_msr_ops.status(bank));
-	if (!(status & MC_STATUS_VAL))
+	if (!(status & MC_STATUS_VAL)) {
+#ifdef DIAGNOSTIC
+		/*
+		 * Check if we have a pending artificial event to generate.
+		 * Note that this is potentially racy with the sysctl. The
+		 * tradeoff is deemed acceptable given the test nature
+		 * of the code.
+		 */
+		if (fake_status && bank == fake_bank) {
+			status = fake_status;
+			fake_status = 0;
+		}
+		if (!(status & MC_STATUS_VAL))
+			return (0);
+#else
 		return (0);
+#endif
+	}
 
 	recover = *recoverablep;
 	mce = mca_is_mce(mcg_cap, status, &recover);
@@ -796,9 +926,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record)
 		mtx_lock_spin(&mca_lock);
 		rec = STAILQ_FIRST(&mca_freelist);
 		if (rec == NULL) {
-			printf("MCA: Unable to allocate space for an event.\n");
-			mca_log(record);
 			mtx_unlock_spin(&mca_lock);
+			printf("MCA: Unable to allocate space for an event.\n");
+			mca_log(mode, record, false);
 			return;
 		}
 		STAILQ_REMOVE_HEAD(&mca_freelist, link);
@@ -955,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep)
 			if (*recoverablep)
 				mca_record_entry(mode, &rec);
 			else
-				mca_log(&rec);
+				mca_log(mode, &rec, true);
 		}
 
 #ifdef DEV_APIC
@@ -1017,6 +1147,7 @@ static void
 mca_process_records(enum scan_mode mode)
 {
 	struct mca_internal *mca;
+	STAILQ_HEAD(, mca_internal) tmplist;
 
 	/*
 	 * If in an interrupt context, defer the post-scan activities to a
@@ -1028,10 +1159,21 @@ mca_process_records(enum scan_mode mode)
 		return;
 	}
 
+	/*
+	 * Copy the pending list to the stack so we can drop the spin lock
+	 * while we are emitting logs.
+	 */
+	STAILQ_INIT(&tmplist);
+	mtx_lock_spin(&mca_lock);
+	STAILQ_SWAP(&mca_pending, &tmplist, mca_internal);
+	mtx_unlock_spin(&mca_lock);
+
+	STAILQ_FOREACH(mca, &tmplist, link)
+		mca_log(mode, &mca->rec, false);
+
 	mtx_lock_spin(&mca_lock);
-	while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) {
-		STAILQ_REMOVE_HEAD(&mca_pending, link);
-		mca_log(&mca->rec);
+	while ((mca = STAILQ_FIRST(&tmplist)) != NULL) {
+		STAILQ_REMOVE_HEAD(&tmplist, link);
 		mca_store_record(mca);
 	}
 	mtx_unlock_spin(&mca_lock);
@@ -1192,6 +1334,7 @@ mca_setup(uint64_t mcg_cap)
 
 	mca_banks = mcg_cap & MCG_CAP_COUNT;
 	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
+	mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN);
 	STAILQ_INIT(&mca_records);
 	STAILQ_INIT(&mca_pending);
 	mca_tq = taskqueue_create_fast("mca", M_WAITOK,