22 files changed, 1388 insertions, 182 deletions
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+	const uint16_t vendorid = pci_get_vendor(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+
+	for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+		const struct amdsmu_product *prod = &amdsmu_products[i];
+
+		if (vendorid == prod->amdsmu_vendorid &&
+		    deviceid == prod->amdsmu_deviceid) {
+			if (product_out != NULL)
+				*product_out = prod;
+			return (true);
+		}
+	}
+	return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+	if (device_find_child(parent, "amdsmu", -1) != NULL)
+		return;
+
+	if (amdsmu_match(parent, NULL)) {
+		if (device_add_child(parent, "amdsmu", -1) == NULL)
+			device_printf(parent, "add amdsmu child failed\n");
+	}
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+	if (resource_disabled("amdsmu", 0))
+		return (ENXIO);
+	if (!amdsmu_match(device_get_parent(dev), NULL))
+		return (ENXIO);
+	device_set_descf(dev, "AMD System Management Unit");
+
+	return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/*
+	 * The SMU has a response ready for us when the response register is
+	 * set.  Otherwise, we must wait.
+	 */
+	for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+		res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+		if (res != SMU_RES_WAIT)
+			return (res);
+		pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+		    C_HARDCLOCK);
+	}
+	device_printf(dev, "timed out waiting for response from SMU\n");
+	return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/* Wait for SMU to be ready. */
+	if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+		return (ETIMEDOUT);
+
+	/* Clear previous response. */
+	amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+	/* Write out command to registers. */
+	amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+	amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+	/* Wait for SMU response and handle it. */
+	res = amdsmu_wait_res(dev);
+
+	switch (res) {
+	case SMU_RES_WAIT:
+		return (ETIMEDOUT);
+	case SMU_RES_OK:
+		if (ret != NULL)
+			*ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+		return (0);
+	case SMU_RES_REJECT_BUSY:
+		device_printf(dev, "SMU is busy\n");
+		return (EBUSY);
+	case SMU_RES_REJECT_PREREQ:
+	case SMU_RES_UNKNOWN:
+	case SMU_RES_FAILED:
+		device_printf(dev, "SMU error: %02x\n", res);
+		return (EIO);
+	}
+
+	return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+	int err;
+	uint32_t smu_vers;
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+	if (err != 0) {
+		device_printf(dev, "failed to get SMU version\n");
+		return (err);
+	}
+	sc->smu_program = (smu_vers >> 24) & 0xFF;
+	sc->smu_maj = (smu_vers >> 16) & 0xFF;
+	sc->smu_min = (smu_vers >> 8) & 0xFF;
+	sc->smu_rev = smu_vers & 0xFF;
+	device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+	    sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+	return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+	int err;
+	struct amdsmu_metrics *m = &sc->metrics;
+	bool active;
+	char sysctl_descr[32];
+
+	/* Get IP block count. */
+	switch (deviceid) {
+	case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+		sc->ip_block_count = 12;
+		break;
+	case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+		sc->ip_block_count = 21;
+		break;
+	/* TODO How many IP blocks does Strix Point (and the others) have? */
+	case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+	default:
+		sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+	}
+	KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+	    ("too many IP blocks for array"));
+
+	/* Get and print out IP blocks. */
+	err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+	    &sc->active_ip_blocks);
+	if (err != 0) {
+		device_printf(dev, "failed to get IP blocks\n");
+		return (err);
+	}
+	device_printf(dev, "Active IP blocks: ");
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		active = (sc->active_ip_blocks & (1 << i)) != 0;
+		sc->ip_blocks_active[i] = active;
+		if (!active)
+			continue;
+		printf("%s%s", amdsmu_ip_blocks_names[i],
+		    i + 1 < sc->ip_block_count ? " " : "\n");
+	}
+
+	/* Create a sysctl node for IP blocks. */
+	sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+	    SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+	    CTLFLAG_RD, NULL, "SMU metrics");
+	if (sc->ip_blocks_sysctlnode == NULL) {
+		device_printf(dev, "could not add sysctl node for IP blocks\n");
+		return (ENOMEM);
+	}
+
+	/* Create a sysctl node for each IP block. */
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		/* Create the sysctl node itself for the IP block. */
+		snprintf(sysctl_descr, sizeof sysctl_descr,
+		    "Metrics about the %s AMD IP block",
+		    amdsmu_ip_blocks_names[i]);
+		sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+		    amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+		if (sc->ip_block_sysctlnodes[i] == NULL) {
+			device_printf(dev,
+			    "could not add sysctl node for \"%s\"\n", sysctl_descr);
+			continue;
+		}
+		/*
+		 * Create sysctls for if the IP block is currently active, last
+		 * active time, and total active time.
+		 */
+		SYSCTL_ADD_BOOL(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+		    "IP block is currently active");
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+		    0, "How long the IP block was active for during the last"
+		    " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+		    0, "How long the IP block was active for during sleep in"
+		    " total (us)");
+#endif
+	}
+	return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t metrics_addr_lo, metrics_addr_hi;
+	uint64_t metrics_addr;
+
+	/* Get physical address of logging buffer. */
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+	if (err != 0)
+		return (err);
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+	if (err != 0)
+		return (err);
+	metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+	/* Map memory of logging buffer. */
+	err = bus_space_map(sc->bus_tag, metrics_addr,
+	    sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+	if (err != 0) {
+		device_printf(dev, "could not map bus space for SMU metrics\n");
+		return (err);
+	}
+
+	/* Start logging for metrics. */
+	amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+	amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+	return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+	if (err != 0) {
+		device_printf(dev, "failed to dump metrics\n");
+		return (err);
+	}
+	bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+	    (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+	return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t physbase_addr_lo, physbase_addr_hi;
+	uint64_t physbase_addr;
+	int rid = 0;
+	struct sysctl_oid *node;
+
+	/*
+	 * Find physical base address for SMU.
+	 * XXX I am a little confused about the masks here.  I'm just copying
+	 * what Linux does in the amd-pmc driver to get the base address.
+	 */
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+	physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+	physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+	physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+	/* Map memory for SMU and its registers. */
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resource\n");
+		return (ENXIO);
+	}
+
+	sc->bus_tag = rman_get_bustag(sc->res);
+
+	if (bus_space_map(sc->bus_tag, physbase_addr,
+	    SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU\n");
+		err = ENXIO;
+		goto err_smu_space;
+	}
+	if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+	    SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU regs\n");
+		err = ENXIO;
+		goto err_reg_space;
+	}
+
+	/* sysctl stuff. */
+	sc->sysctlctx = device_get_sysctl_ctx(dev);
+	sc->sysctlnode = device_get_sysctl_tree(dev);
+
+	/* Get version & add sysctls. */
+	if ((err = amdsmu_get_vers(dev)) != 0)
+		goto err_dump;
+
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+	    "SMU firmware major version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+	    "SMU firmware minor version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+	    "SMU firmware revision number");
+
+	/* Set up for getting metrics & add sysctls. */
+	if ((err = amdsmu_init_metrics(dev)) != 0)
+		goto err_dump;
+	if ((err = amdsmu_dump_metrics(dev)) != 0)
+		goto err_dump;
+
+	node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+	    OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+	if (node == NULL) {
+		device_printf(dev, "could not add sysctl node for metrics\n");
+		err = ENOMEM;
+		goto err_dump;
+	}
+
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+	    "SMU metrics table version");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+	    "How many times the sleep hint was set");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "s0i3_last_entry_status", CTLFLAG_RD,
+	    &sc->metrics.s0i3_last_entry_status, 0,
+	    "1 if last S0i3 entry was successful");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+	    "Time spent in S0i2 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.time_last_entering_s0i3, 0,
+	    "Time spent entering S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.total_time_entering_s0i3, 0,
+	    "Total time spent entering S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+	    0, "Time spent resuming from last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+	    0, "Total time spent resuming from sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+	    "Time spent in S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+	    0, "Total time spent in S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.time_last_in_sw_drips, 0,
+	    "Time spent in awake during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.total_time_in_sw_drips, 0,
+	    "Total time spent awake (us)");
+
+	/* Get IP blocks & add sysctls. */
+	err = amdsmu_get_ip_blocks(dev);
+	if (err != 0)
+		goto err_dump;
+
+	/* Get idlemask & add sysctl. */
+	amdsmu_fetch_idlemask(dev);
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+	    "value is not documented - only used to help AMD internally debug "
+	    "issues");
+
+	return (0);
+err_dump:
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int rid = 0;
+
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+	DEVMETHOD(device_identify,	amdsmu_identify),
+	DEVMETHOD(device_probe,		amdsmu_probe),
+	DEVMETHOD(device_attach,	amdsmu_attach),
+	DEVMETHOD(device_detach,	amdsmu_detach),
+	DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+	"amdsmu",
+	amdsmu_methods,
+	sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+    nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define	_AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US	50
+#define SMU_RES_READ_MAX	20000
+
+static const struct amdsmu_product {
+	uint16_t	amdsmu_vendorid;
+	uint16_t	amdsmu_deviceid;
+} amdsmu_products[] = {
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_PHOENIX_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+    "DISPLAY",
+    "CPU",
+    "GFX",
+    "VDD",
+    "ACP",
+    "VCN",
+    "ISP",
+    "NBIO",
+    "DF",
+    "USB3_0",
+    "USB3_1",
+    "LAPIC",
+    "USB3_2",
+    "USB3_3",
+    "USB3_4",
+    "USB4_0",
+    "USB4_1",
+    "MPM",
+    "JPEG",
+    "IPU",
+    "UMSCH",
+    "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+	struct sysctl_ctx_list	*sysctlctx;
+	struct sysctl_oid	*sysctlnode;
+
+	struct resource		*res;
+	bus_space_tag_t 	bus_tag;
+
+	bus_space_handle_t	smu_space;
+	bus_space_handle_t	reg_space;
+
+	uint8_t			smu_program;
+	uint8_t			smu_maj, smu_min, smu_rev;
+
+	uint32_t		active_ip_blocks;
+	struct sysctl_oid	*ip_blocks_sysctlnode;
+	size_t			ip_block_count;
+	struct sysctl_oid	*ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+	bool			ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+	bus_space_handle_t	metrics_space;
+	struct amdsmu_metrics	metrics;
+	uint32_t		idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+	return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+	bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define	_AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out?  Also, there are way more of these.  I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT		0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT		0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT	0x14A4
+
+#define SMU_INDEX_ADDRESS	0xB8
+#define SMU_INDEX_DATA		0xBC
+
+#define SMU_PHYSBASE_ADDR_LO	0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI	0x13B102EC
+
+#define SMU_MEM_SIZE		0x1000
+#define SMU_REG_SPACE_OFF	0x10000
+
+#define SMU_REG_MESSAGE		0x538
+#define SMU_REG_RESPONSE	0x980
+#define SMU_REG_ARGUMENT	0x9BC
+#define SMU_REG_IDLEMASK	0xD14
+
+enum amdsmu_res {
+	SMU_RES_WAIT		= 0x00,
+	SMU_RES_OK		= 0x01,
+	SMU_RES_REJECT_BUSY	= 0xFC,
+	SMU_RES_REJECT_PREREQ	= 0xFD,
+	SMU_RES_UNKNOWN		= 0xFE,
+	SMU_RES_FAILED		= 0xFF,
+};
+
+enum amdsmu_msg {
+	SMU_MSG_GETSMUVERSION		= 0x02,
+	SMU_MSG_LOG_GETDRAM_ADDR_HI	= 0x04,
+	SMU_MSG_LOG_GETDRAM_ADDR_LO	= 0x05,
+	SMU_MSG_LOG_START		= 0x06,
+	SMU_MSG_LOG_RESET		= 0x07,
+	SMU_MSG_LOG_DUMP_DATA		= 0x08,
+	SMU_MSG_GET_SUP_CONSTRAINTS	= 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+	uint32_t table_version;
+	uint32_t hint_count;
+	uint32_t s0i3_last_entry_status;
+	uint32_t time_last_in_s0i2;
+	uint64_t time_last_entering_s0i3;
+	uint64_t total_time_entering_s0i3;
+	uint64_t time_last_resuming;
+	uint64_t total_time_resuming;
+	uint64_t time_last_in_s0i3;
+	uint64_t total_time_in_s0i3;
+	uint64_t time_last_in_sw_drips;
+	uint64_t total_time_in_sw_drips;
+	/*
+	 * This is how long each IP block was active for (us), i.e., blocking
+	 * entry to S0i3.  In Linux, these are called "timecondition_notmet_*".
+	 *
+	 * XXX Total active time for IP blocks seems to be buggy and reporting
+	 * garbage (at least on Phoenix), so it's disabled for now.  The last
+	 * active time for the USB4_0 IP block also seems to be buggy.
+	 */
+	uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+	uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 8547f21586e1..7a6b1cbdd736 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -703,7 +703,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n;
 
-			if ((m->m_flags & M_NOTAVAIL) != 0)
+			if ((m->m_flags & M_NOTREADY) != 0)
 				break;
 			if (m->m_flags & M_EXTPG) {
 #ifdef KERN_TLS
@@ -787,7 +787,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* nothing to send */
 		if (plen == 0) {
-			KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
+			KASSERT(m == NULL || (m->m_flags & M_NOTREADY) != 0,
 			    ("%s: nothing to send, but m != NULL is ready",
 			    __func__));
 			break;
@@ -880,7 +880,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
-	} while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
+	} while (m != NULL && (m->m_flags & M_NOTREADY) == 0);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index c6377980fca9..27c16b9988ae 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -563,7 +563,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
-		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
+		if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
@@ -614,7 +614,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
-		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
+		    (m->m_next->m_flags & M_NOTREADY) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
 	struct task		 fb_mode_task;
 };
 
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
 static void
 vt_restore_fbdev_mode(void *arg, int pending)
 {
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
 
 enum efi_table_type {
 	TYPE_ESRT = 0,
-	TYPE_PROP
+	TYPE_PROP,
+	TYPE_MEMORY_ATTR
 };
 
 static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
 		free(buf, M_TEMP);
 		return (0);
 	}
+	case TYPE_MEMORY_ATTR:
+	{
+		efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+		struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+		int error;
+		void *buf;
+		size_t len = sizeof(struct efi_memory_attribute_table);
+
+		error = efi_get_table(&guid, (void **)&tbl_addr);
+		if (error)
+			return (error);
+
+		buf = malloc(len, M_TEMP, M_WAITOK);
+		error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+		if (error) {
+			free(buf, M_TEMP);
+			return (error);
+		}
+
+		mem_addr = (struct efi_memory_attribute_table *)buf;
+		if (mem_addr->version != 2) {
+			free(buf, M_TEMP);
+			return (EINVAL);
+		}
+		len += mem_addr->descriptor_size * mem_addr->num_ents;
+		if (len > EFI_TABLE_ALLOC_MAX) {
+			free(buf, M_TEMP);
+			return (ENOMEM);
+		}
+
+		*table_len = len;
+		if (taddr != NULL)
+			*taddr = tbl_addr;
+		free(buf, M_TEMP);
+		return (0);
+	}
 	}
 	return (ENOENT);
 }
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
 		enum efi_table_type type;
 	} tables[] = {
 		{ EFI_TABLE_ESRT,       TYPE_ESRT },
-		{ EFI_PROPERTIES_TABLE, TYPE_PROP }
+		{ EFI_PROPERTIES_TABLE, TYPE_PROP },
+		{ EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
 	};
 	size_t table_idx;
 	void *taddr;
diff --git a/sys/dev/ice/ice_iov.c b/sys/dev/ice/ice_iov.c
index e06c7eb56f7a..c5a3e1060e44 100644
--- a/sys/dev/ice/ice_iov.c
+++ b/sys/dev/ice/ice_iov.c
@@ -345,7 +345,7 @@ ice_iov_add_vf(struct ice_softc *sc, uint16_t vfnum, const nvlist_t *params)
 
 	if (nvlist_exists_binary(params, "mac-addr")) {
 		mac = nvlist_get_binary(params, "mac-addr", &size);
-		bcopy(mac, vf->mac, ETHER_ADDR_LEN);
+		memcpy(vf->mac, mac, ETHER_ADDR_LEN);
 
 		if (nvlist_get_bool(params, "allow-set-mac"))
 			vf->vf_flags |= VF_FLAG_SET_MAC_CAP;
@@ -617,12 +617,14 @@ ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
 {
 	struct ice_hw *hw = &sc->hw;
 	struct virtchnl_vf_resource *vf_res;
+	struct virtchnl_vsi_resource *vsi_res;
 	u16 vf_res_len;
 	u32 vf_caps;
 
 	/* XXX: Only support one VSI per VF, so this size doesn't need adjusting */
 	vf_res_len = sizeof(struct virtchnl_vf_resource);
-	vf_res = (struct virtchnl_vf_resource *)malloc(vf_res_len, M_ICE, M_WAITOK | M_ZERO);
+	vf_res = (struct virtchnl_vf_resource *)malloc(vf_res_len, M_ICE,
+	    M_WAITOK | M_ZERO);
 
 	vf_res->num_vsis = 1;
 	vf_res->num_queue_pairs = vf->vsi->num_tx_queues;
@@ -643,10 +645,13 @@ ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
                         vf_res->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
 	}
 
-	vf_res->vsi_res[0].vsi_id = vf->vsi->idx;
-	vf_res->vsi_res[0].num_queue_pairs = vf->vsi->num_tx_queues;
-	vf_res->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
-	vf_res->vsi_res[0].qset_handle = 0;
+	vsi_res = &vf_res->vsi_res[0];
+	vsi_res->vsi_id = vf->vsi->idx;
+	vsi_res->num_queue_pairs = vf->vsi->num_tx_queues;
+	vsi_res->vsi_type = VIRTCHNL_VSI_SRIOV;
+	vsi_res->qset_handle = 0;
+	if (!ETHER_IS_ZERO(vf->mac))
+		memcpy(vsi_res->default_mac_addr, vf->mac, ETHER_ADDR_LEN);
 
 	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_GET_VF_RESOURCES,
 	    VIRTCHNL_STATUS_SUCCESS, (u8 *)vf_res, vf_res_len, NULL);
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 29dc0c880e3a..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -89,6 +89,8 @@
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
 {
 	caddr_t mod;
 	u_char *ptr, *name, *type;
+	u_char scratch[40];
 	unsigned len;
 	int i;
+	vm_offset_t paddr;
 
 	/* figure out log2(NINDIR) */
 	for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
 			sx_xunlock(&md_sx);
 		}
 	}
+
+	/*
+	 * Load up to 32 pre-loaded disks
+	 */
+	for (int i = 0; i < 32; i++) {
+		if (resource_long_value("md", i, "physaddr",
+			(long *) &paddr) != 0 ||
+		    resource_int_value("md", i, "len", &len) != 0)
+		        break;
+		ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+		if (ptr != NULL && len != 0) {
+			sprintf(scratch, "preload%d 0x%016jx", i,
+			    (uintmax_t)paddr);
+			sx_xlock(&md_sx);
+			md_preloaded(ptr, len, scratch);
+			sx_xunlock(&md_sx);
+		}
+	}
+
 	status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 	    0600, MDCTL_NAME);
 	g_topology_lock();
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
-						struct nvme_async_event_request *aer);
+    struct nvme_async_event_request *aer);
 
 static void
 nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
 }
 
 static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
-	struct nvme_async_event_request		*aer = arg;
-	struct nvme_health_information_page	*health_info;
-	struct nvme_ns_list			*nsl;
-	struct nvme_error_information_entry	*err;
-	int i;
-
-	/*
-	 * If the log page fetch for some reason completed with an error,
-	 *  don't pass log page data to the consumers.  In practice, this case
-	 *  should never happen.
-	 */
-	if (nvme_completion_is_error(cpl))
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, NULL, 0);
-	else {
-		/* Convert data to host endian */
-		switch (aer->log_page_id) {
-		case NVME_LOG_ERROR:
-			err = (struct nvme_error_information_entry *)aer->log_page_buffer;
-			for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
-				nvme_error_information_entry_swapbytes(err++);
-			break;
-		case NVME_LOG_HEALTH_INFORMATION:
-			nvme_health_information_page_swapbytes(
-			    (struct nvme_health_information_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_CHANGED_NAMESPACE:
-			nvme_ns_list_swapbytes(
-			    (struct nvme_ns_list *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_COMMAND_EFFECT:
-			nvme_command_effects_page_swapbytes(
-			    (struct nvme_command_effects_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_RES_NOTIFICATION:
-			nvme_res_notification_page_swapbytes(
-			    (struct nvme_res_notification_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_SANITIZE_STATUS:
-			nvme_sanitize_status_page_swapbytes(
-			    (struct nvme_sanitize_status_page *)aer->log_page_buffer);
-			break;
-		default:
-			break;
-		}
-
-		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
-			health_info = (struct nvme_health_information_page *)
-			    aer->log_page_buffer;
-			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
-			    health_info->critical_warning);
-			/*
-			 * Critical warnings reported through the
-			 *  SMART/health log page are persistent, so
-			 *  clear the associated bits in the async event
-			 *  config so that we do not receive repeated
-			 *  notifications for the same event.
-			 */
-			aer->ctrlr->async_event_config &=
-			    ~health_info->critical_warning;
-			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
-			    aer->ctrlr->async_event_config, NULL, NULL);
-		} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
-		    !nvme_use_nvd) {
-			nsl = (struct nvme_ns_list *)aer->log_page_buffer;
-			for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
-				if (nsl->ns[i] > NVME_MAX_NAMESPACES)
-					break;
-				nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
-			}
-		}
-
-		/*
-		 * Pass the cpl data from the original async event completion,
-		 *  not the log page fetch.
-		 */
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
-	}
-
-	/*
-	 * Repost another asynchronous event request to replace the one
-	 *  that just completed.
-	 */
-	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 		return;
 	}
 
-	/* Associated log page is in bits 23:16 of completion entry dw0. */
+	/*
+	 * Save the completion status and associated log page is in bits 23:16
+	 * of completion entry dw0. Print a message and queue it for further
+	 * processing.
+	 */
+	memcpy(&aer->cpl, cpl, sizeof(*cpl));
 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
 	nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
 	    " page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
 	    NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
 	    aer->log_page_id);
-
-	if (is_log_page_id_valid(aer->log_page_id)) {
-		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
-		    aer->log_page_id);
-		memcpy(&aer->cpl, cpl, sizeof(*cpl));
-		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
-		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
-		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
-		    aer);
-		/* Wait to notify consumers until after log page is fetched. */
-	} else {
-		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
-		    NULL, 0);
-
-		/*
-		 * Repost another asynchronous event request to replace the one
-		 *  that just completed.
-		 */
-		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-	}
+	taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
 }
 
 static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 {
 	struct nvme_request *req;
 
-	aer->ctrlr = ctrlr;
 	/*
-	 * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
-	 * callback context.  AER completions should be handled on a dedicated
-	 * thread.
+	 * We're racing the reset thread, so let that process submit this again.
+	 * XXX does this really solve that race? And is that race even possible
+	 * since we only reset when we've no theard from the card in a long
+	 * time. Why would we get an AER in the middle of that just before we
+	 * kick off the reset?
 	 */
-	req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+	if (ctrlr->is_resetting)
+		return;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
 	    aer);
 	aer->req = req;
+	aer->log_page_id = 0;		/* Not a valid page */
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
+static void
+nvme_ctrlr_aer_done(void *arg,  const struct nvme_completion *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+
+	mtx_lock(&aer->mtx);
+	if (nvme_completion_is_error(cpl))
+		aer->log_page_size = (uint32_t)-1;
+	else
+		aer->log_page_size = nvme_ctrlr_get_log_page_size(
+		    aer->ctrlr, aer->log_page_id);
+	wakeup(aer);
+	mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct nvme_controller	*ctrlr = aer->ctrlr;
+	uint32_t len;
+
+	/*
+	 * We're resetting, so just punt.
+	 */
+	if (ctrlr->is_resetting)
+		return;
+
+	if (!is_log_page_id_valid(aer->log_page_id)) {
+		/*
+		 * Repost another asynchronous event request to replace the one
+		 * that just completed.
+		 */
+		nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+		    NULL, 0);
+		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+		goto out;
+	}
+
+	aer->log_page_size = 0;
+	len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+	nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+	    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+	    nvme_ctrlr_aer_done, aer);
+	mtx_lock(&aer->mtx);
+	while (aer->log_page_size == 0)
+		mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+	mtx_unlock(&aer->mtx);
+
+	if (aer->log_page_size != (uint32_t)-1) {
+		/*
+		 * If the log page fetch for some reason completed with an
+		 * error, don't pass log page data to the consumers.  In
+		 * practice, this case should never happen.
+		 */
+		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+		    aer->log_page_id, NULL, 0);
+		goto out;
+	}
+
+	/* Convert data to host endian */
+	switch (aer->log_page_id) {
+	case NVME_LOG_ERROR: {
+		struct nvme_error_information_entry *err =
+		    (struct nvme_error_information_entry *)aer->log_page_buffer;
+		for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+			nvme_error_information_entry_swapbytes(err++);
+		break;
+	}
+	case NVME_LOG_HEALTH_INFORMATION:
+		nvme_health_information_page_swapbytes(
+			(struct nvme_health_information_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_CHANGED_NAMESPACE:
+		nvme_ns_list_swapbytes(
+			(struct nvme_ns_list *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_COMMAND_EFFECT:
+		nvme_command_effects_page_swapbytes(
+			(struct nvme_command_effects_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_RES_NOTIFICATION:
+		nvme_res_notification_page_swapbytes(
+			(struct nvme_res_notification_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_SANITIZE_STATUS:
+		nvme_sanitize_status_page_swapbytes(
+			(struct nvme_sanitize_status_page *)aer->log_page_buffer);
+		break;
+	default:
+		break;
+	}
+
+	if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+		struct nvme_health_information_page *health_info =
+		    (struct nvme_health_information_page *)aer->log_page_buffer;
+
+		/*
+		 * Critical warnings reported through the SMART/health log page
+		 * are persistent, so clear the associated bits in the async
+		 * event config so that we do not receive repeated notifications
+		 * for the same event.
+		 */
+		nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+		    health_info->critical_warning);
+		aer->ctrlr->async_event_config &=
+		    ~health_info->critical_warning;
+		nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+		    aer->ctrlr->async_event_config, NULL, NULL);
+	} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+		struct nvme_ns_list *nsl =
+		    (struct nvme_ns_list *)aer->log_page_buffer;
+		for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+			if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+				break;
+			nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+		}
+	}
+
+	/*
+	 * Pass the cpl data from the original async event completion, not the
+	 * log page fetch.
+	 */
+	nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+	    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+out:
+	nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
 /*
  * Poll all the queues enabled on the device for completion.
  */
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	/*
 	 * Create 2 threads for the taskqueue. The reset thread will block when
 	 * it detects that the controller has failed until all I/O has been
-	 * failed up the stack. The fail_req task needs to be able to run in
-	 * this case to finish the request failure for some cases.
-	 *
-	 * We could partially solve this race by draining the failed requeust
-	 * queue before proceding to free the sim, though nothing would stop
-	 * new I/O from coming in after we do that drain, but before we reach
-	 * cam_sim_free, so this big hammer is used instead.
+	 * failed up the stack. The second thread is used for AER events, which
+	 * can block, but only briefly for memory and log page fetching.
 	 */
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	ctrlr->is_initialized = false;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
-	STAILQ_INIT(&ctrlr->fail_req);
+	for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+		struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+		TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+		mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+	}
 	ctrlr->is_failed = false;
 
 	make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 	}
 
 noadminq:
-	if (ctrlr->taskqueue)
+	if (ctrlr->taskqueue) {
 		taskqueue_free(ctrlr->taskqueue);
+		for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+			struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+			mtx_destroy(&aer->mtx);
+		}
+	}
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
 struct nvme_async_event_request {
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
+	struct task			task;
+	struct mtx			mtx;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
 	bool				isr_warned;
 	bool				is_initialized;
 
-	STAILQ_HEAD(, nvme_request)	fail_req;
-
 	/* Host Memory Buffer */
 	int				hmb_nchunks;
 	size_t				hmb_chunk;
diff --git a/sys/dev/ofw/ofw_bus_subr.c b/sys/dev/ofw/ofw_bus_subr.c
index 4d0479dfb957..b99d784929bc 100644
--- a/sys/dev/ofw/ofw_bus_subr.c
+++ b/sys/dev/ofw/ofw_bus_subr.c
@@ -634,11 +634,89 @@ ofw_bus_find_iparent(phandle_t node)
 	return (iparent);
 }
 
+static phandle_t
+ofw_bus_search_iparent(phandle_t node)
+{
+	phandle_t iparent;
+
+	do {
+		if (OF_getencprop(node, "interrupt-parent", &iparent,
+		    sizeof(iparent)) > 0) {
+			node = OF_node_from_xref(iparent);
+		} else {
+			node = OF_parent(node);
+		}
+		if (node == 0)
+			return (0);
+	} while (!OF_hasprop(node, "#interrupt-cells"));
+
+	return (OF_xref_from_node(node));
+}
+
+static int
+ofw_bus_traverse_imap(phandle_t inode, phandle_t node, uint32_t *intr,
+    int intrsz, pcell_t *res, int ressz, phandle_t *iparentp)
+{
+	struct ofw_bus_iinfo ii;
+	void *reg;
+	uint32_t *intrp;
+	phandle_t iparent;
+	int rv = 0;
+
+	/* We already have an interrupt controller */
+	if (OF_hasprop(node, "interrupt-controller"))
+		return (0);
+
+	intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+	memcpy(intrp, intr, intrsz);
+
+	while (true) {
+		/* There is no interrupt-map to follow */
+		if (!OF_hasprop(inode, "interrupt-map")) {
+			free(intrp, M_OFWPROP);
+			return (0);
+		}
+
+		memset(&ii, 0, sizeof(ii));
+		ofw_bus_setup_iinfo(inode, &ii, sizeof(cell_t));
+
+		reg = NULL;
+		if (ii.opi_addrc > 0)
+			reg = malloc(ii.opi_addrc, M_OFWPROP, M_WAITOK);
+
+		rv = ofw_bus_lookup_imap(node, &ii, reg, ii.opi_addrc, intrp,
+		    intrsz, res, ressz, &iparent);
+
+		free(reg, M_OFWPROP);
+		free(ii.opi_imap, M_OFWPROP);
+		free(ii.opi_imapmsk, M_OFWPROP);
+		free(intrp, M_OFWPROP);
+
+		if (rv == 0)
+			return (0);
+
+		node = inode;
+		inode = OF_node_from_xref(iparent);
+
+		/* Stop when we have an interrupt controller */
+		if (OF_hasprop(inode, "interrupt-controller")) {
+			*iparentp = iparent;
+			return (rv);
+		}
+
+		intrsz = rv * sizeof(pcell_t);
+		intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+		memcpy(intrp, res, intrsz);
+	}
+}
+
 int
 ofw_bus_intr_to_rl(device_t dev, phandle_t node,
     struct resource_list *rl, int *rlen)
 {
-	phandle_t iparent;
+	phandle_t iparent, iparent_node;
+	uint32_t result[16];
+	uint32_t intrpcells, *intrp;
 	uint32_t icells, *intr;
 	int err, i, irqnum, nintr, rid;
 	bool extended;
@@ -646,15 +724,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 	nintr = OF_getencprop_alloc_multi(node, "interrupts",  sizeof(*intr),
 	    (void **)&intr);
 	if (nintr > 0) {
-		iparent = ofw_bus_find_iparent(node);
+		iparent = ofw_bus_search_iparent(node);
 		if (iparent == 0) {
 			device_printf(dev, "No interrupt-parent found, "
 			    "assuming direct parent\n");
 			iparent = OF_parent(node);
 			iparent = OF_xref_from_node(iparent);
 		}
-		if (OF_searchencprop(OF_node_from_xref(iparent), 
-		    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
+		iparent_node = OF_node_from_xref(iparent);
+		if (OF_searchencprop(iparent_node, "#interrupt-cells", &icells,
+		    sizeof(icells)) == -1) {
 			device_printf(dev, "Missing #interrupt-cells "
 			    "property, assuming <1>\n");
 			icells = 1;
@@ -677,7 +756,8 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 	for (i = 0; i < nintr; i += icells) {
 		if (extended) {
 			iparent = intr[i++];
-			if (OF_searchencprop(OF_node_from_xref(iparent), 
+			iparent_node = OF_node_from_xref(iparent);
+			if (OF_searchencprop(iparent_node,
 			    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 				device_printf(dev, "Missing #interrupt-cells "
 				    "property\n");
@@ -691,7 +771,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 				break;
 			}
 		}
-		irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]);
+
+		intrp = &intr[i];
+		intrpcells = ofw_bus_traverse_imap(iparent_node, node, intrp,
+		    icells * sizeof(intr[0]), result, sizeof(result), &iparent);
+		if (intrpcells > 0)
+			intrp = result;
+		else
+			intrpcells = icells;
+
+		irqnum = ofw_bus_map_intr(dev, iparent, intrpcells, intrp);
 		resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1);
 	}
 	if (rlen != NULL)
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 9d23d5df1d2b..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2308,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
         else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
 		if_setbaudrate(ifp, IF_Gbps(100));
 
-        if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
         if_setinitfn(ifp, qlnx_init);
         if_setsoftc(ifp, ha);
         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2343,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 
 	if_setcapabilities(ifp, IFCAP_HWCSUM);
 	if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2352,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 	if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
 
 	if_sethwtsomax(ifp,  QLNX_MAX_TSO_FRAME_SIZE -
 				(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c
index c4282c723a44..8363de99a60a 100644
--- a/sys/dev/random/fortuna.c
+++ b/sys/dev/random/fortuna.c
@@ -341,6 +341,13 @@ random_fortuna_process_event(struct harvest_event *event)
 	u_int pl;
 
 	RANDOM_RESEED_LOCK();
+	/*
+	 * Run SP 800-90B health tests on the source if so configured.
+	 */
+	if (!random_harvest_healthtest(event)) {
+		RANDOM_RESEED_UNLOCK();
+		return;
+	}
 	/*-
 	 * FS&K - P_i = P_i|<harvested stuff>
 	 * Accumulate the event into the appropriate pool
diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index 395310b115fb..c7762967c4fb 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -88,6 +88,8 @@ static void random_sources_feed(void);
 static __read_mostly bool epoch_inited;
 static __read_mostly epoch_t rs_epoch;
 
+static const char *random_source_descr[ENTROPYSOURCE];
+
 /*
  * How many events to queue up. We create this many items in
  * an 'empty' queue, then transfer them to the 'harvest' queue with
@@ -299,6 +301,230 @@ random_sources_feed(void)
 	explicit_bzero(entropy, sizeof(entropy));
 }
 
+/*
+ * State used for conducting NIST SP 800-90B health tests on entropy sources.
+ */
+static struct health_test_softc {
+	uint32_t ht_rct_value[HARVESTSIZE + 1];
+	u_int ht_rct_count;	/* number of samples with the same value */
+	u_int ht_rct_limit;	/* constant after init */
+
+	uint32_t ht_apt_value[HARVESTSIZE + 1];
+	u_int ht_apt_count;	/* number of samples with the same value */
+	u_int ht_apt_seq;	/* sequence number of the last sample */
+	u_int ht_apt_cutoff;	/* constant after init */
+
+	uint64_t ht_total_samples;
+	bool ondemand;		/* Set to true to restart the state machine */
+	enum {
+		INIT = 0,	/* initial state */
+		DISABLED,	/* health checking is disabled */
+		STARTUP,	/* doing startup tests, samples are discarded */
+		STEADY,		/* steady-state operation */
+		FAILED,		/* health check failed, discard samples */
+	} ht_state;
+} healthtest[ENTROPYSOURCE];
+
+#define	RANDOM_SELFTEST_STARTUP_SAMPLES	1024	/* 4.3, requirement 4 */
+#define	RANDOM_SELFTEST_APT_WINDOW	512	/* 4.4.2 */
+
+static void
+copy_event(uint32_t dst[static HARVESTSIZE + 1],
+    const struct harvest_event *event)
+{
+	memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1));
+	memcpy(dst, event->he_entropy, event->he_size);
+	dst[HARVESTSIZE] = event->he_somecounter;
+}
+
+static void
+random_healthtest_rct_init(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	ht->ht_rct_count = 1;
+	copy_event(ht->ht_rct_value, event);
+}
+
+/*
+ * Apply the repitition count test to a sample.
+ *
+ * Return false if the test failed, i.e., we observed >= C consecutive samples
+ * with the same value, and true otherwise.
+ */
+static bool
+random_healthtest_rct_next(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	uint32_t val[HARVESTSIZE + 1];
+
+	copy_event(val, event);
+	if (memcmp(val, ht->ht_rct_value, sizeof(ht->ht_rct_value)) != 0) {
+		ht->ht_rct_count = 1;
+		memcpy(ht->ht_rct_value, val, sizeof(ht->ht_rct_value));
+		return (true);
+	} else {
+		ht->ht_rct_count++;
+		return (ht->ht_rct_count < ht->ht_rct_limit);
+	}
+}
+
+static void
+random_healthtest_apt_init(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	ht->ht_apt_count = 1;
+	ht->ht_apt_seq = 1;
+	copy_event(ht->ht_apt_value, event);
+}
+
+static bool
+random_healthtest_apt_next(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	uint32_t val[HARVESTSIZE + 1];
+
+	if (ht->ht_apt_seq == 0) {
+		random_healthtest_apt_init(ht, event);
+		return (true);
+	}
+
+	copy_event(val, event);
+	if (memcmp(val, ht->ht_apt_value, sizeof(ht->ht_apt_value)) == 0) {
+		ht->ht_apt_count++;
+		if (ht->ht_apt_count >= ht->ht_apt_cutoff)
+			return (false);
+	}
+
+	ht->ht_apt_seq++;
+	if (ht->ht_apt_seq == RANDOM_SELFTEST_APT_WINDOW)
+		ht->ht_apt_seq = 0;
+
+	return (true);
+}
+
+/*
+ * Run the health tests for the given event.  This is assumed to be called from
+ * a serialized context.
+ */
+bool
+random_harvest_healthtest(const struct harvest_event *event)
+{
+	struct health_test_softc *ht;
+
+	ht = &healthtest[event->he_source];
+
+	/*
+	 * Was on-demand testing requested?  Restart the state machine if so,
+	 * restarting the startup tests.
+	 */
+	if (atomic_load_bool(&ht->ondemand)) {
+		atomic_store_bool(&ht->ondemand, false);
+		ht->ht_state = INIT;
+	}
+
+	switch (ht->ht_state) {
+	case __predict_false(INIT):
+		/* Store the first sample and initialize test state. */
+		random_healthtest_rct_init(ht, event);
+		random_healthtest_apt_init(ht, event);
+		ht->ht_total_samples = 0;
+		ht->ht_state = STARTUP;
+		return (false);
+	case DISABLED:
+		/* No health testing for this source. */
+		return (true);
+	case STEADY:
+	case STARTUP:
+		ht->ht_total_samples++;
+		if (random_healthtest_rct_next(ht, event) &&
+		    random_healthtest_apt_next(ht, event)) {
+			if (ht->ht_state == STARTUP &&
+			    ht->ht_total_samples >=
+			    RANDOM_SELFTEST_STARTUP_SAMPLES) {
+				printf(
+			    "random: health test passed for source %s\n",
+				    random_source_descr[event->he_source]);
+				ht->ht_state = STEADY;
+			}
+			return (ht->ht_state == STEADY);
+		}
+		ht->ht_state = FAILED;
+		printf(
+	    "random: health test failed for source %s, discarding samples\n",
+		    random_source_descr[event->he_source]);
+		/* FALLTHROUGH */
+	case FAILED:
+		return (false);
+	}
+}
+
+static bool nist_healthtest_enabled = false;
+SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled,
+    CTLFLAG_RDTUN, &nist_healthtest_enabled, 0,
+    "Enable NIST SP 800-90B health tests for noise sources");
+
+static void
+random_healthtest_init(enum random_entropy_source source)
+{
+	struct health_test_softc *ht;
+
+	ht = &healthtest[source];
+	KASSERT(ht->ht_state == INIT,
+	    ("%s: health test state is %d for source %d",
+	    __func__, ht->ht_state, source));
+
+	/*
+	 * If health-testing is enabled, validate all sources except CACHED and
+	 * VMGENID: they are deterministic sources used only a small, fixed
+	 * number of times, so statistical testing is not applicable.
+	 */
+	if (!nist_healthtest_enabled ||
+	    source == RANDOM_CACHED || source == RANDOM_PURE_VMGENID) {
+		ht->ht_state = DISABLED;
+		return;
+	}
+
+	/*
+	 * Set cutoff values for the two tests, assuming that each sample has
+	 * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}.
+	 * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false
+	 * positive once in ~54.5 years.
+	 *
+	 * The RCT limit comes from the formula in section 4.4.1.
+	 *
+	 * The APT cutoff is calculated using the formula in section 4.4.2
+	 * footnote 10 with the window size changed from 512 to 511, since the
+	 * test as written counts the number of samples equal to the first
+	 * sample in the window, and thus tests W-1 samples.
+	 */
+	ht->ht_rct_limit = 35;
+	ht->ht_apt_cutoff = 330;
+}
+
+static int
+random_healthtest_ondemand(SYSCTL_HANDLER_ARGS)
+{
+	u_int mask, source;
+	int error;
+
+	mask = 0;
+	error = sysctl_handle_int(oidp, &mask, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	while (mask != 0) {
+		source = ffs(mask) - 1;
+		if (source < nitems(healthtest))
+			atomic_store_bool(&healthtest[source].ondemand, true);
+		mask &= ~(1u << source);
+	}
+	return (0);
+}
+SYSCTL_PROC(_kern_random, OID_AUTO, nist_healthtest_ondemand,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
+    random_healthtest_ondemand, "I",
+    "Re-run NIST SP 800-90B startup health tests for a noise source");
+
 static int
 random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
 {
@@ -362,7 +588,8 @@ static const char *random_source_descr[ENTROPYSOURCE] = {
 	[RANDOM_SWI] = "SWI",
 	[RANDOM_FS_ATIME] = "FS_ATIME",
 	[RANDOM_UMA] = "UMA",
-	[RANDOM_CALLOUT] = "CALLOUT", /* ENVIRONMENTAL_END */
+	[RANDOM_CALLOUT] = "CALLOUT",
+	[RANDOM_RANDOMDEV] = "RANDOMDEV", /* ENVIRONMENTAL_END */
 	[RANDOM_PURE_OCTEON] = "PURE_OCTEON", /* PURE_START */
 	[RANDOM_PURE_SAFE] = "PURE_SAFE",
 	[RANDOM_PURE_GLXSB] = "PURE_GLXSB",
@@ -424,6 +651,9 @@ random_harvestq_init(void *unused __unused)
 	hc_source_mask = almost_everything_mask;
 	RANDOM_HARVEST_INIT_LOCK();
 	harvest_context.hc_active_buf = 0;
+
+	for (int i = 0; i < ENTROPYSOURCE; i++)
+		random_healthtest_init(i);
 }
 SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL);
 
diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h
index 7804bf52aa4f..1d462500df85 100644
--- a/sys/dev/random/random_harvestq.h
+++ b/sys/dev/random/random_harvestq.h
@@ -49,4 +49,6 @@ random_get_cyclecount(void)
 	return ((uint32_t)get_cyclecount());
 }
 
+bool random_harvest_healthtest(const struct harvest_event *event);
+
 #endif /* SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED */
diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c
index 9d1c7b1167c8..ced4dd8067d9 100644
--- a/sys/dev/random/randomdev.c
+++ b/sys/dev/random/randomdev.c
@@ -312,7 +312,7 @@ randomdev_accumulate(uint8_t *buf, u_int count)
 	for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) {
 		event.he_somecounter = random_get_cyclecount();
 		event.he_size = sizeof(event.he_entropy);
-		event.he_source = RANDOM_CACHED;
+		event.he_source = RANDOM_RANDOMDEV;
 		event.he_destination = destination++; /* Harmless cheating */
 		memcpy(event.he_entropy, entropy_data + i, sizeof(event.he_entropy));
 		p_random_alg_context->ra_event_processor(&event);
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
 #include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 struct vm_memseg_12 {
 	int		segid;
 	size_t		len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
 #define	VM_GET_MEMSEG_12	\
 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+	int		segid;
+	size_t		len;
+	char		name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+    "COMPAT_FREEBSD14 ABI");
+
+#define	VM_ALLOC_MEMSEG_14	\
+	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define	VM_GET_MEMSEG_14	\
+	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
 
 struct devmem_softc {
 	int	segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 }
 
 static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+    struct domainset *domainset)
 {
 	char *name;
 	int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 		if (error)
 			goto done;
 	}
-
-	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
 	if (error)
 		goto done;
 
@@ -295,6 +311,20 @@ done:
 	return (error);
 }
 
+#if defined(__amd64__) && \
+    (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+	if (mseg->segid != VM_SYSMEM) {
+		mseg->segid += (VM_BOOTROM - 1);
+	}
+}
+#endif
+
 static int
 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
 	VMMDEV_IOCTL(VM_STAT_DESC, 0),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_REINIT,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
 	VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vmmdev_softc *sc;
 	struct vcpu *vcpu;
 	const struct vmmdev_ioctl *ioctl;
+	struct vm_memseg *mseg;
 	int error, vcpuid;
 
 	sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
 		break;
 	}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	case VM_ALLOC_MEMSEG_12:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg_12 *)0)->name));
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_12 *)0)->name), NULL);
 		break;
 	case VM_GET_MEMSEG_12:
-		error = get_memseg(sc, (struct vm_memseg *)data,
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
 		    sizeof(((struct vm_memseg_12 *)0)->name));
 		break;
-#endif
-	case VM_ALLOC_MEMSEG:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+	case VM_ALLOC_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+		break;
+	case VM_GET_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name));
+		break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+	case VM_ALLOC_MEMSEG: {
+		domainset_t *mask;
+		struct domainset *domainset, domain;
+
+		domainset = NULL;
+		mseg = (struct vm_memseg *)data;
+		if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+			if (mseg->ds_mask_size < sizeof(domainset_t) ||
+			    mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+				error = ERANGE;
+				break;
+			}
+			memset(&domain, 0, sizeof(domain));
+			mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+			error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			error = domainset_populate(&domain, mask, mseg->ds_policy,
+			    mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			domainset = domainset_create(&domain);
+			if (domainset == NULL) {
+				error = EINVAL;
+				free(mask, M_VMMDEV);
+				break;
+			}
+			free(mask, M_VMMDEV);
+		}
+		error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
 		break;
+	}
 	case VM_GET_MEMSEG:
 		error = get_memseg(sc, (struct vm_memseg *)data,
 		    sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_destroy, "A",
-    NULL);
+    "Destroy a vmm(4) instance (legacy interface)");
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_create, "A",
-    NULL);
+    "Create a vmm(4) instance (legacy interface)");
 
 static int
 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
 
 #include <sys/types.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
 }
 
 int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset)
 {
-	struct vm_mem *mem;
 	struct vm_mem_seg *seg;
+	struct vm_mem *mem;
 	vm_object_t obj;
 
 	mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 			return (EINVAL);
 	}
 
+	/*
+	 * When given an impossible policy, signal an
+	 * error to the user.
+	 */
+	if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+		return (EINVAL);
 	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
 	if (obj == NULL)
 		return (ENOMEM);
 
 	seg->len = len;
 	seg->object = obj;
+	if (obj_domainset != NULL)
+		seg->object->domain.dr_policy = obj_domainset;
 	seg->sysmem = sysmem;
+
 	return (0);
 }
 
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
 #ifndef _DEV_VMM_MEM_H_
 #define	_DEV_VMM_MEM_H_
 
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+        VM_SYSMEM = 0,
+        VM_BOOTROM = VM_MAXSYSMEM,
+        VM_FRAMEBUFFER,
+        VM_PCIROM,
+        VM_MEMSEG_END
+};
+
+#define	VM_MAX_MEMSEGS	VM_MEMSEG_END
+#define	VM_MAX_MEMMAPS	(VM_MAX_MEMSEGS * 2)
+
 #ifdef _KERNEL
 
 #include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
 	int		flags;
 };
 
-#define	VM_MAX_MEMSEGS	4
-#define	VM_MAX_MEMMAPS	8
-
 struct vm_mem {
 	struct vm_mem_map	mem_maps[VM_MAX_MEMMAPS];
 	struct vm_mem_seg	mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot, int flags);
 int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset);
 void vm_free_memseg(struct vm *vm, int ident);
 
 /*
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
 
 	/* Reinit VGA mode, to restore view after app which change mode. */
 	vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
-	/* Ask vt(9) to update chars on visible area. */
+	/* Ask vt(4) to update chars on visible area. */
 	vd->vd_flags |= VDF_INVALID;
 }
 
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
 			(vw)->vw_number)
 
 static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "vt(9) parameters");
+    "vt(4) parameters");
 static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
 static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
 static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
 static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");