21 files changed, 1092 insertions, 260 deletions
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+	const uint16_t vendorid = pci_get_vendor(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+
+	for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+		const struct amdsmu_product *prod = &amdsmu_products[i];
+
+		if (vendorid == prod->amdsmu_vendorid &&
+		    deviceid == prod->amdsmu_deviceid) {
+			if (product_out != NULL)
+				*product_out = prod;
+			return (true);
+		}
+	}
+	return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+	if (device_find_child(parent, "amdsmu", -1) != NULL)
+		return;
+
+	if (amdsmu_match(parent, NULL)) {
+		if (device_add_child(parent, "amdsmu", -1) == NULL)
+			device_printf(parent, "add amdsmu child failed\n");
+	}
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+	if (resource_disabled("amdsmu", 0))
+		return (ENXIO);
+	if (!amdsmu_match(device_get_parent(dev), NULL))
+		return (ENXIO);
+	device_set_descf(dev, "AMD System Management Unit");
+
+	return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/*
+	 * The SMU has a response ready for us when the response register is
+	 * set.  Otherwise, we must wait.
+	 */
+	for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+		res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+		if (res != SMU_RES_WAIT)
+			return (res);
+		pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+		    C_HARDCLOCK);
+	}
+	device_printf(dev, "timed out waiting for response from SMU\n");
+	return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	enum amdsmu_res res;
+
+	/* Wait for SMU to be ready. */
+	if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+		return (ETIMEDOUT);
+
+	/* Clear previous response. */
+	amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+	/* Write out command to registers. */
+	amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+	amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+	/* Wait for SMU response and handle it. */
+	res = amdsmu_wait_res(dev);
+
+	switch (res) {
+	case SMU_RES_WAIT:
+		return (ETIMEDOUT);
+	case SMU_RES_OK:
+		if (ret != NULL)
+			*ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+		return (0);
+	case SMU_RES_REJECT_BUSY:
+		device_printf(dev, "SMU is busy\n");
+		return (EBUSY);
+	case SMU_RES_REJECT_PREREQ:
+	case SMU_RES_UNKNOWN:
+	case SMU_RES_FAILED:
+		device_printf(dev, "SMU error: %02x\n", res);
+		return (EIO);
+	}
+
+	return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+	int err;
+	uint32_t smu_vers;
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+	if (err != 0) {
+		device_printf(dev, "failed to get SMU version\n");
+		return (err);
+	}
+	sc->smu_program = (smu_vers >> 24) & 0xFF;
+	sc->smu_maj = (smu_vers >> 16) & 0xFF;
+	sc->smu_min = (smu_vers >> 8) & 0xFF;
+	sc->smu_rev = smu_vers & 0xFF;
+	device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+	    sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+	return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	const uint16_t deviceid = pci_get_device(dev);
+	int err;
+	struct amdsmu_metrics *m = &sc->metrics;
+	bool active;
+	char sysctl_descr[32];
+
+	/* Get IP block count. */
+	switch (deviceid) {
+	case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+		sc->ip_block_count = 12;
+		break;
+	case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+		sc->ip_block_count = 21;
+		break;
+	/* TODO How many IP blocks does Strix Point (and the others) have? */
+	case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+	default:
+		sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+	}
+	KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+	    ("too many IP blocks for array"));
+
+	/* Get and print out IP blocks. */
+	err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+	    &sc->active_ip_blocks);
+	if (err != 0) {
+		device_printf(dev, "failed to get IP blocks\n");
+		return (err);
+	}
+	device_printf(dev, "Active IP blocks: ");
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		active = (sc->active_ip_blocks & (1 << i)) != 0;
+		sc->ip_blocks_active[i] = active;
+		if (!active)
+			continue;
+		printf("%s%s", amdsmu_ip_blocks_names[i],
+		    i + 1 < sc->ip_block_count ? " " : "\n");
+	}
+
+	/* Create a sysctl node for IP blocks. */
+	sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+	    SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+	    CTLFLAG_RD, NULL, "SMU metrics");
+	if (sc->ip_blocks_sysctlnode == NULL) {
+		device_printf(dev, "could not add sysctl node for IP blocks\n");
+		return (ENOMEM);
+	}
+
+	/* Create a sysctl node for each IP block. */
+	for (size_t i = 0; i < sc->ip_block_count; i++) {
+		/* Create the sysctl node itself for the IP block. */
+		snprintf(sysctl_descr, sizeof sysctl_descr,
+		    "Metrics about the %s AMD IP block",
+		    amdsmu_ip_blocks_names[i]);
+		sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+		    amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+		if (sc->ip_block_sysctlnodes[i] == NULL) {
+			device_printf(dev,
+			    "could not add sysctl node for \"%s\"\n", sysctl_descr);
+			continue;
+		}
+		/*
+		 * Create sysctls for if the IP block is currently active, last
+		 * active time, and total active time.
+		 */
+		SYSCTL_ADD_BOOL(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+		    "IP block is currently active");
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+		    0, "How long the IP block was active for during the last"
+		    " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+		SYSCTL_ADD_U64(sc->sysctlctx,
+		    SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+		    "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+		    0, "How long the IP block was active for during sleep in"
+		    " total (us)");
+#endif
+	}
+	return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t metrics_addr_lo, metrics_addr_hi;
+	uint64_t metrics_addr;
+
+	/* Get physical address of logging buffer. */
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+	if (err != 0)
+		return (err);
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+	if (err != 0)
+		return (err);
+	metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+	/* Map memory of logging buffer. */
+	err = bus_space_map(sc->bus_tag, metrics_addr,
+	    sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+	if (err != 0) {
+		device_printf(dev, "could not map bus space for SMU metrics\n");
+		return (err);
+	}
+
+	/* Start logging for metrics. */
+	amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+	amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+	return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+
+	err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+	if (err != 0) {
+		device_printf(dev, "failed to dump metrics\n");
+		return (err);
+	}
+	bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+	    (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+	return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+
+	sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int err;
+	uint32_t physbase_addr_lo, physbase_addr_hi;
+	uint64_t physbase_addr;
+	int rid = 0;
+	struct sysctl_oid *node;
+
+	/*
+	 * Find physical base address for SMU.
+	 * XXX I am a little confused about the masks here.  I'm just copying
+	 * what Linux does in the amd-pmc driver to get the base address.
+	 */
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+	physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+	pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+	physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+	physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+	/* Map memory for SMU and its registers. */
+	sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+	if (sc->res == NULL) {
+		device_printf(dev, "could not allocate resource\n");
+		return (ENXIO);
+	}
+
+	sc->bus_tag = rman_get_bustag(sc->res);
+
+	if (bus_space_map(sc->bus_tag, physbase_addr,
+	    SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU\n");
+		err = ENXIO;
+		goto err_smu_space;
+	}
+	if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+	    SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+		device_printf(dev, "could not map bus space for SMU regs\n");
+		err = ENXIO;
+		goto err_reg_space;
+	}
+
+	/* sysctl stuff. */
+	sc->sysctlctx = device_get_sysctl_ctx(dev);
+	sc->sysctlnode = device_get_sysctl_tree(dev);
+
+	/* Get version & add sysctls. */
+	if ((err = amdsmu_get_vers(dev)) != 0)
+		goto err_dump;
+
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+	    "SMU firmware major version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+	    "SMU firmware minor version number");
+	SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+	    "SMU firmware revision number");
+
+	/* Set up for getting metrics & add sysctls. */
+	if ((err = amdsmu_init_metrics(dev)) != 0)
+		goto err_dump;
+	if ((err = amdsmu_dump_metrics(dev)) != 0)
+		goto err_dump;
+
+	node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+	    OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+	if (node == NULL) {
+		device_printf(dev, "could not add sysctl node for metrics\n");
+		err = ENOMEM;
+		goto err_dump;
+	}
+
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+	    "SMU metrics table version");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+	    "How many times the sleep hint was set");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "s0i3_last_entry_status", CTLFLAG_RD,
+	    &sc->metrics.s0i3_last_entry_status, 0,
+	    "1 if last S0i3 entry was successful");
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+	    "Time spent in S0i2 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.time_last_entering_s0i3, 0,
+	    "Time spent entering S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_entering_s0i3", CTLFLAG_RD,
+	    &sc->metrics.total_time_entering_s0i3, 0,
+	    "Total time spent entering S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+	    0, "Time spent resuming from last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+	    0, "Total time spent resuming from sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+	    "Time spent in S0i3 during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+	    0, "Total time spent in S0i3 (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "time_last_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.time_last_in_sw_drips, 0,
+	    "Time spent in awake during last sleep (us)");
+	SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+	    "total_time_in_sw_drips", CTLFLAG_RD,
+	    &sc->metrics.total_time_in_sw_drips, 0,
+	    "Total time spent awake (us)");
+
+	/* Get IP blocks & add sysctls. */
+	err = amdsmu_get_ip_blocks(dev);
+	if (err != 0)
+		goto err_dump;
+
+	/* Get idlemask & add sysctl. */
+	amdsmu_fetch_idlemask(dev);
+	SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+	    "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+	    "value is not documented - only used to help AMD internally debug "
+	    "issues");
+
+	return (0);
+err_dump:
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+	struct amdsmu_softc *sc = device_get_softc(dev);
+	int rid = 0;
+
+	bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+	bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+	bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+	return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+	DEVMETHOD(device_identify,	amdsmu_identify),
+	DEVMETHOD(device_probe,		amdsmu_probe),
+	DEVMETHOD(device_attach,	amdsmu_attach),
+	DEVMETHOD(device_detach,	amdsmu_detach),
+	DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+	"amdsmu",
+	amdsmu_methods,
+	sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+    nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define	_AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US	50
+#define SMU_RES_READ_MAX	20000
+
+static const struct amdsmu_product {
+	uint16_t	amdsmu_vendorid;
+	uint16_t	amdsmu_deviceid;
+} amdsmu_products[] = {
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_PHOENIX_ROOT },
+	{ CPU_VENDOR_AMD,	PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+    "DISPLAY",
+    "CPU",
+    "GFX",
+    "VDD",
+    "ACP",
+    "VCN",
+    "ISP",
+    "NBIO",
+    "DF",
+    "USB3_0",
+    "USB3_1",
+    "LAPIC",
+    "USB3_2",
+    "USB3_3",
+    "USB3_4",
+    "USB4_0",
+    "USB4_1",
+    "MPM",
+    "JPEG",
+    "IPU",
+    "UMSCH",
+    "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+	struct sysctl_ctx_list	*sysctlctx;
+	struct sysctl_oid	*sysctlnode;
+
+	struct resource		*res;
+	bus_space_tag_t 	bus_tag;
+
+	bus_space_handle_t	smu_space;
+	bus_space_handle_t	reg_space;
+
+	uint8_t			smu_program;
+	uint8_t			smu_maj, smu_min, smu_rev;
+
+	uint32_t		active_ip_blocks;
+	struct sysctl_oid	*ip_blocks_sysctlnode;
+	size_t			ip_block_count;
+	struct sysctl_oid	*ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+	bool			ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+	bus_space_handle_t	metrics_space;
+	struct amdsmu_metrics	metrics;
+	uint32_t		idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+	return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+	bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define	_AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out?  Also, there are way more of these.  I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT		0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT		0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT	0x14A4
+
+#define SMU_INDEX_ADDRESS	0xB8
+#define SMU_INDEX_DATA		0xBC
+
+#define SMU_PHYSBASE_ADDR_LO	0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI	0x13B102EC
+
+#define SMU_MEM_SIZE		0x1000
+#define SMU_REG_SPACE_OFF	0x10000
+
+#define SMU_REG_MESSAGE		0x538
+#define SMU_REG_RESPONSE	0x980
+#define SMU_REG_ARGUMENT	0x9BC
+#define SMU_REG_IDLEMASK	0xD14
+
+enum amdsmu_res {
+	SMU_RES_WAIT		= 0x00,
+	SMU_RES_OK		= 0x01,
+	SMU_RES_REJECT_BUSY	= 0xFC,
+	SMU_RES_REJECT_PREREQ	= 0xFD,
+	SMU_RES_UNKNOWN		= 0xFE,
+	SMU_RES_FAILED		= 0xFF,
+};
+
+enum amdsmu_msg {
+	SMU_MSG_GETSMUVERSION		= 0x02,
+	SMU_MSG_LOG_GETDRAM_ADDR_HI	= 0x04,
+	SMU_MSG_LOG_GETDRAM_ADDR_LO	= 0x05,
+	SMU_MSG_LOG_START		= 0x06,
+	SMU_MSG_LOG_RESET		= 0x07,
+	SMU_MSG_LOG_DUMP_DATA		= 0x08,
+	SMU_MSG_GET_SUP_CONSTRAINTS	= 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+	uint32_t table_version;
+	uint32_t hint_count;
+	uint32_t s0i3_last_entry_status;
+	uint32_t time_last_in_s0i2;
+	uint64_t time_last_entering_s0i3;
+	uint64_t total_time_entering_s0i3;
+	uint64_t time_last_resuming;
+	uint64_t total_time_resuming;
+	uint64_t time_last_in_s0i3;
+	uint64_t total_time_in_s0i3;
+	uint64_t time_last_in_sw_drips;
+	uint64_t total_time_in_sw_drips;
+	/*
+	 * This is how long each IP block was active for (us), i.e., blocking
+	 * entry to S0i3.  In Linux, these are called "timecondition_notmet_*".
+	 *
+	 * XXX Total active time for IP blocks seems to be buggy and reporting
+	 * garbage (at least on Phoenix), so it's disabled for now.  The last
+	 * active time for the USB4_0 IP block also seems to be buggy.
+	 */
+	uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+	uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 8547f21586e1..7a6b1cbdd736 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -703,7 +703,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n;
 
-			if ((m->m_flags & M_NOTAVAIL) != 0)
+			if ((m->m_flags & M_NOTREADY) != 0)
 				break;
 			if (m->m_flags & M_EXTPG) {
 #ifdef KERN_TLS
@@ -787,7 +787,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* nothing to send */
 		if (plen == 0) {
-			KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
+			KASSERT(m == NULL || (m->m_flags & M_NOTREADY) != 0,
 			    ("%s: nothing to send, but m != NULL is ready",
 			    __func__));
 			break;
@@ -880,7 +880,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
-	} while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
+	} while (m != NULL && (m->m_flags & M_NOTREADY) == 0);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index c6377980fca9..27c16b9988ae 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -563,7 +563,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 		 * If there is no ready data to send, wait until more
 		 * data arrives.
 		 */
-		if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
+		if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
 			if (sowwakeup)
 				sowwakeup_locked(so);
 			else
@@ -614,7 +614,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
 
 		/* Shove if there is no additional data pending. */
 		shove = ((m->m_next == NULL ||
-		    (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
+		    (m->m_next->m_flags & M_NOTREADY) != 0)) &&
 		    (tp->t_flags & TF_MORETOCOME) == 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
 	struct task		 fb_mode_task;
 };
 
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
 static void
 vt_restore_fbdev_mode(void *arg, int pending)
 {
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
 
 enum efi_table_type {
 	TYPE_ESRT = 0,
-	TYPE_PROP
+	TYPE_PROP,
+	TYPE_MEMORY_ATTR
 };
 
 static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
 		free(buf, M_TEMP);
 		return (0);
 	}
+	case TYPE_MEMORY_ATTR:
+	{
+		efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+		struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+		int error;
+		void *buf;
+		size_t len = sizeof(struct efi_memory_attribute_table);
+
+		error = efi_get_table(&guid, (void **)&tbl_addr);
+		if (error)
+			return (error);
+
+		buf = malloc(len, M_TEMP, M_WAITOK);
+		error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+		if (error) {
+			free(buf, M_TEMP);
+			return (error);
+		}
+
+		mem_addr = (struct efi_memory_attribute_table *)buf;
+		if (mem_addr->version != 2) {
+			free(buf, M_TEMP);
+			return (EINVAL);
+		}
+		len += mem_addr->descriptor_size * mem_addr->num_ents;
+		if (len > EFI_TABLE_ALLOC_MAX) {
+			free(buf, M_TEMP);
+			return (ENOMEM);
+		}
+
+		*table_len = len;
+		if (taddr != NULL)
+			*taddr = tbl_addr;
+		free(buf, M_TEMP);
+		return (0);
+	}
 	}
 	return (ENOENT);
 }
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
 		enum efi_table_type type;
 	} tables[] = {
 		{ EFI_TABLE_ESRT,       TYPE_ESRT },
-		{ EFI_PROPERTIES_TABLE, TYPE_PROP }
+		{ EFI_PROPERTIES_TABLE, TYPE_PROP },
+		{ EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
 	};
 	size_t table_idx;
 	void *taddr;
diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c
index 9c0324a24685..3f1d7a0cefba 100644
--- a/sys/dev/iicbus/iichid.c
+++ b/sys/dev/iicbus/iichid.c
@@ -275,62 +275,36 @@ iichid_cmd_read(struct iichid_softc* sc, void *buf, iichid_size_t maxlen,
 	 * 6.1.3 - Retrieval of Input Reports
 	 * DEVICE returns the length (2 Bytes) and the entire Input Report.
 	 */
-	uint8_t actbuf[2] = { 0, 0 };
-	/* Read actual input report length. */
+
+	memset(buf, 0xaa, 2);   // In case nothing gets read
 	struct iic_msg msgs[] = {
-	    { sc->addr, IIC_M_RD | IIC_M_NOSTOP, sizeof(actbuf), actbuf },
+	    { sc->addr, IIC_M_RD, maxlen, buf },
 	};
-	uint16_t actlen;
 	int error;
 
 	error = iicbus_transfer(sc->dev, msgs, nitems(msgs));
 	if (error != 0)
 		return (error);
 
-	actlen = actbuf[0] | actbuf[1] << 8;
-#ifdef IICHID_SAMPLING
-	if ((actlen == 0 && sc->sampling_rate_slow < 0) ||
-	    (maxlen == 0 && sc->sampling_rate_slow >= 0)) {
-#else
+	DPRINTFN(sc, 5, "%*D\n", msgs[0].len, msgs[0].buf, " ");
+
+	uint16_t actlen = le16dec(buf);
+
 	if (actlen == 0) {
-#endif
-		/* Read and discard reset command response. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART,
-		        le16toh(sc->desc.wMaxInputLength) - 2, sc->intr_buf };
-		actlen = 0;
 		if (!sc->reset_acked) {
 			mtx_lock(&sc->mtx);
 			sc->reset_acked = true;
 			wakeup(&sc->reset_acked);
 			mtx_unlock(&sc->mtx);
 		}
-#ifdef IICHID_SAMPLING
-	} else if ((actlen <= 2 || actlen == 0xFFFF) &&
-		    sc->sampling_rate_slow >= 0) {
-		/* Read and discard 1 byte to send I2C STOP condition. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART, 1, actbuf };
-		actlen = 0;
-#endif
-	} else {
-		actlen -= 2;
-		if (actlen > maxlen) {
-			DPRINTF(sc, "input report too big. requested=%d "
-			    "received=%d\n", maxlen, actlen);
-			actlen = maxlen;
-		}
-		/* Read input report itself. */
-		msgs[0] = (struct iic_msg)
-		    { sc->addr, IIC_M_RD | IIC_M_NOSTART, actlen, buf };
 	}
 
-	error = iicbus_transfer(sc->dev, msgs, 1);
-	if (error == 0 && actual_len != NULL)
+	if (actlen <= 2 || actlen > maxlen) {
+		actlen = 0;
+	}
+	if (actual_len != NULL) {
 		*actual_len = actlen;
-
-	DPRINTFN(sc, 5,
-	    "%*D - %*D\n", 2, actbuf, " ", msgs[0].len, msgs[0].buf, " ");
+	}
 
 	return (error);
 }
@@ -566,7 +540,7 @@ iichid_sampling_task(void *context, int pending)
 	error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
 	if (error == 0) {
 		if (actual > 0) {
-			sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+			sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
 			sc->missing_samples = 0;
 			if (sc->dup_size != actual ||
 			    memcmp(sc->dup_buf, sc->intr_buf, actual) != 0) {
@@ -577,7 +551,7 @@ iichid_sampling_task(void *context, int pending)
 				++sc->dup_samples;
 		} else {
 			if (++sc->missing_samples == 1)
-				sc->intr_handler(sc->intr_ctx, sc->intr_buf, 0);
+				sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, 0);
 			sc->dup_samples = 0;
 		}
 	} else
@@ -632,7 +606,7 @@ iichid_intr(void *context)
 	if (error == 0) {
 		if (sc->power_on && sc->open) {
 			if (actual != 0)
-				sc->intr_handler(sc->intr_ctx, sc->intr_buf,
+				sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2,
 				    actual);
 			else
 				DPRINTF(sc, "no data received\n");
@@ -842,11 +816,12 @@ iichid_intr_setup(device_t dev, device_t child __unused, hid_intr_t intr,
 
 	sc = device_get_softc(dev);
 	/*
-	 * Do not rely on wMaxInputLength, as some devices may set it to
-	 * a wrong length. Find the longest input report in report descriptor.
+	 * Do not rely just on wMaxInputLength, as some devices (which?)
+	 * may set it to a wrong length.  Also find the longest input report
+	 * in report descriptor, and add two for the length field.
 	 */
-	rdesc->rdsize =
-	    MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength) - 2);
+	rdesc->rdsize = 2 +
+	    MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength));
 	/* Write and get/set_report sizes are limited by I2C-HID protocol. */
 	rdesc->grsize = rdesc->srsize = IICHID_SIZE_MAX;
 	rdesc->wrsize = IICHID_SIZE_MAX;
@@ -919,7 +894,7 @@ iichid_intr_poll(device_t dev, device_t child __unused)
 	sc = device_get_softc(dev);
 	error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
 	if (error == 0 && actual != 0)
-		sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+		sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
 }
 
 /*
@@ -946,6 +921,7 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
 {
 	struct iichid_softc *sc;
 	device_t parent;
+	uint8_t *tmpbuf;
 	int error;
 
 	if (maxlen > IICHID_SIZE_MAX)
@@ -954,8 +930,12 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
 	parent = device_get_parent(sc->dev);
 	error = iicbus_request_bus(parent, sc->dev, IIC_WAIT);
 	if (error == 0) {
-		error = iichid_cmd_read(sc, buf, maxlen, actlen);
+		tmpbuf = malloc(maxlen + 2, M_DEVBUF, M_WAITOK | M_ZERO);
+		error = iichid_cmd_read(sc, tmpbuf, maxlen + 2, actlen);
 		iicbus_release_bus(parent, sc->dev);
+		if (*actlen > 0)
+			memcpy(buf, tmpbuf + 2, *actlen);
+		free(tmpbuf, M_DEVBUF);
 	}
 	return (iic2errno(error));
 }
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 29dc0c880e3a..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -89,6 +89,8 @@
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
 {
 	caddr_t mod;
 	u_char *ptr, *name, *type;
+	u_char scratch[40];
 	unsigned len;
 	int i;
+	vm_offset_t paddr;
 
 	/* figure out log2(NINDIR) */
 	for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
 			sx_xunlock(&md_sx);
 		}
 	}
+
+	/*
+	 * Load up to 32 pre-loaded disks
+	 */
+	for (int i = 0; i < 32; i++) {
+		if (resource_long_value("md", i, "physaddr",
+			(long *) &paddr) != 0 ||
+		    resource_int_value("md", i, "len", &len) != 0)
+		        break;
+		ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+		if (ptr != NULL && len != 0) {
+			sprintf(scratch, "preload%d 0x%016jx", i,
+			    (uintmax_t)paddr);
+			sx_xlock(&md_sx);
+			md_preloaded(ptr, len, scratch);
+			sx_xunlock(&md_sx);
+		}
+	}
+
 	status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 	    0600, MDCTL_NAME);
 	g_topology_lock();
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
-						struct nvme_async_event_request *aer);
+    struct nvme_async_event_request *aer);
 
 static void
 nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
 }
 
 static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
-	struct nvme_async_event_request		*aer = arg;
-	struct nvme_health_information_page	*health_info;
-	struct nvme_ns_list			*nsl;
-	struct nvme_error_information_entry	*err;
-	int i;
-
-	/*
-	 * If the log page fetch for some reason completed with an error,
-	 *  don't pass log page data to the consumers.  In practice, this case
-	 *  should never happen.
-	 */
-	if (nvme_completion_is_error(cpl))
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, NULL, 0);
-	else {
-		/* Convert data to host endian */
-		switch (aer->log_page_id) {
-		case NVME_LOG_ERROR:
-			err = (struct nvme_error_information_entry *)aer->log_page_buffer;
-			for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
-				nvme_error_information_entry_swapbytes(err++);
-			break;
-		case NVME_LOG_HEALTH_INFORMATION:
-			nvme_health_information_page_swapbytes(
-			    (struct nvme_health_information_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_CHANGED_NAMESPACE:
-			nvme_ns_list_swapbytes(
-			    (struct nvme_ns_list *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_COMMAND_EFFECT:
-			nvme_command_effects_page_swapbytes(
-			    (struct nvme_command_effects_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_RES_NOTIFICATION:
-			nvme_res_notification_page_swapbytes(
-			    (struct nvme_res_notification_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_SANITIZE_STATUS:
-			nvme_sanitize_status_page_swapbytes(
-			    (struct nvme_sanitize_status_page *)aer->log_page_buffer);
-			break;
-		default:
-			break;
-		}
-
-		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
-			health_info = (struct nvme_health_information_page *)
-			    aer->log_page_buffer;
-			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
-			    health_info->critical_warning);
-			/*
-			 * Critical warnings reported through the
-			 *  SMART/health log page are persistent, so
-			 *  clear the associated bits in the async event
-			 *  config so that we do not receive repeated
-			 *  notifications for the same event.
-			 */
-			aer->ctrlr->async_event_config &=
-			    ~health_info->critical_warning;
-			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
-			    aer->ctrlr->async_event_config, NULL, NULL);
-		} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
-		    !nvme_use_nvd) {
-			nsl = (struct nvme_ns_list *)aer->log_page_buffer;
-			for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
-				if (nsl->ns[i] > NVME_MAX_NAMESPACES)
-					break;
-				nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
-			}
-		}
-
-		/*
-		 * Pass the cpl data from the original async event completion,
-		 *  not the log page fetch.
-		 */
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
-	}
-
-	/*
-	 * Repost another asynchronous event request to replace the one
-	 *  that just completed.
-	 */
-	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 		return;
 	}
 
-	/* Associated log page is in bits 23:16 of completion entry dw0. */
+	/*
+	 * Save the completion status and associated log page is in bits 23:16
+	 * of completion entry dw0. Print a message and queue it for further
+	 * processing.
+	 */
+	memcpy(&aer->cpl, cpl, sizeof(*cpl));
 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
 	nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
 	    " page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
 	    NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
 	    aer->log_page_id);
-
-	if (is_log_page_id_valid(aer->log_page_id)) {
-		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
-		    aer->log_page_id);
-		memcpy(&aer->cpl, cpl, sizeof(*cpl));
-		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
-		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
-		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
-		    aer);
-		/* Wait to notify consumers until after log page is fetched. */
-	} else {
-		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
-		    NULL, 0);
-
-		/*
-		 * Repost another asynchronous event request to replace the one
-		 *  that just completed.
-		 */
-		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-	}
+	taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
 }
 
 static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 {
 	struct nvme_request *req;
 
-	aer->ctrlr = ctrlr;
 	/*
-	 * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
-	 * callback context.  AER completions should be handled on a dedicated
-	 * thread.
+	 * We're racing the reset thread, so let that process submit this again.
+	 * XXX does this really solve that race? And is that race even possible
+	 * since we only reset when we've no theard from the card in a long
+	 * time. Why would we get an AER in the middle of that just before we
+	 * kick off the reset?
 	 */
-	req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+	if (ctrlr->is_resetting)
+		return;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
 	    aer);
 	aer->req = req;
+	aer->log_page_id = 0;		/* Not a valid page */
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
+static void
+nvme_ctrlr_aer_done(void *arg,  const struct nvme_completion *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+
+	mtx_lock(&aer->mtx);
+	if (nvme_completion_is_error(cpl))
+		aer->log_page_size = (uint32_t)-1;
+	else
+		aer->log_page_size = nvme_ctrlr_get_log_page_size(
+		    aer->ctrlr, aer->log_page_id);
+	wakeup(aer);
+	mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct nvme_controller	*ctrlr = aer->ctrlr;
+	uint32_t len;
+
+	/*
+	 * We're resetting, so just punt.
+	 */
+	if (ctrlr->is_resetting)
+		return;
+
+	if (!is_log_page_id_valid(aer->log_page_id)) {
+		/*
+		 * Repost another asynchronous event request to replace the one
+		 * that just completed.
+		 */
+		nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+		    NULL, 0);
+		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+		goto out;
+	}
+
+	aer->log_page_size = 0;
+	len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+	nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+	    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+	    nvme_ctrlr_aer_done, aer);
+	mtx_lock(&aer->mtx);
+	while (aer->log_page_size == 0)
+		mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+	mtx_unlock(&aer->mtx);
+
+	if (aer->log_page_size != (uint32_t)-1) {
+		/*
+		 * If the log page fetch for some reason completed with an
+		 * error, don't pass log page data to the consumers.  In
+		 * practice, this case should never happen.
+		 */
+		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+		    aer->log_page_id, NULL, 0);
+		goto out;
+	}
+
+	/* Convert data to host endian */
+	switch (aer->log_page_id) {
+	case NVME_LOG_ERROR: {
+		struct nvme_error_information_entry *err =
+		    (struct nvme_error_information_entry *)aer->log_page_buffer;
+		for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+			nvme_error_information_entry_swapbytes(err++);
+		break;
+	}
+	case NVME_LOG_HEALTH_INFORMATION:
+		nvme_health_information_page_swapbytes(
+			(struct nvme_health_information_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_CHANGED_NAMESPACE:
+		nvme_ns_list_swapbytes(
+			(struct nvme_ns_list *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_COMMAND_EFFECT:
+		nvme_command_effects_page_swapbytes(
+			(struct nvme_command_effects_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_RES_NOTIFICATION:
+		nvme_res_notification_page_swapbytes(
+			(struct nvme_res_notification_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_SANITIZE_STATUS:
+		nvme_sanitize_status_page_swapbytes(
+			(struct nvme_sanitize_status_page *)aer->log_page_buffer);
+		break;
+	default:
+		break;
+	}
+
+	if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+		struct nvme_health_information_page *health_info =
+		    (struct nvme_health_information_page *)aer->log_page_buffer;
+
+		/*
+		 * Critical warnings reported through the SMART/health log page
+		 * are persistent, so clear the associated bits in the async
+		 * event config so that we do not receive repeated notifications
+		 * for the same event.
+		 */
+		nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+		    health_info->critical_warning);
+		aer->ctrlr->async_event_config &=
+		    ~health_info->critical_warning;
+		nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+		    aer->ctrlr->async_event_config, NULL, NULL);
+	} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+		struct nvme_ns_list *nsl =
+		    (struct nvme_ns_list *)aer->log_page_buffer;
+		for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+			if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+				break;
+			nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+		}
+	}
+
+	/*
+	 * Pass the cpl data from the original async event completion, not the
+	 * log page fetch.
+	 */
+	nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+	    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+out:
+	nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
 /*
  * Poll all the queues enabled on the device for completion.
  */
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	/*
 	 * Create 2 threads for the taskqueue. The reset thread will block when
 	 * it detects that the controller has failed until all I/O has been
-	 * failed up the stack. The fail_req task needs to be able to run in
-	 * this case to finish the request failure for some cases.
-	 *
-	 * We could partially solve this race by draining the failed requeust
-	 * queue before proceding to free the sim, though nothing would stop
-	 * new I/O from coming in after we do that drain, but before we reach
-	 * cam_sim_free, so this big hammer is used instead.
+	 * failed up the stack. The second thread is used for AER events, which
+	 * can block, but only briefly for memory and log page fetching.
 	 */
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	ctrlr->is_initialized = false;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
-	STAILQ_INIT(&ctrlr->fail_req);
+	for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+		struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+		TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+		mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+	}
 	ctrlr->is_failed = false;
 
 	make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 	}
 
 noadminq:
-	if (ctrlr->taskqueue)
+	if (ctrlr->taskqueue) {
 		taskqueue_free(ctrlr->taskqueue);
+		for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+			struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+			mtx_destroy(&aer->mtx);
+		}
+	}
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
 struct nvme_async_event_request {
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
+	struct task			task;
+	struct mtx			mtx;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
 	bool				isr_warned;
 	bool				is_initialized;
 
-	STAILQ_HEAD(, nvme_request)	fail_req;
-
 	/* Host Memory Buffer */
 	int				hmb_nchunks;
 	size_t				hmb_chunk;
diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c
index bb2bc0988e81..245971813854 100644
--- a/sys/dev/nvmf/controller/nvmft_subr.c
+++ b/sys/dev/nvmf/controller/nvmft_subr.c
@@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn)
 	len = strnlen(nqn, NVME_NQN_FIELD_SIZE);
 	if (len == 0 || len > NVMF_NQN_MAX_LEN)
 		return (false);
-
-#ifdef STRICT_CHECKS
-	/*
-	 * Stricter checks from the spec.  Linux does not seem to
-	 * require these.
-	 */
-
-	/*
-	 * NVMF_NQN_MIN_LEN does not include '.', and require at least
-	 * one character of a domain name.
-	 */
-	if (len < NVMF_NQN_MIN_LEN + 2)
-		return (false);
-	if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
-		return (false);
-	nqn += strlen("nqn.");
-
-	/* Next 4 digits must be a year. */
-	for (u_int i = 0; i < 4; i++) {
-		if (!isdigit(nqn[i]))
-			return (false);
-	}
-	nqn += 4;
-
-	/* '-' between year and month. */
-	if (nqn[0] != '-')
-		return (false);
-	nqn++;
-
-	/* 2 digit month. */
-	for (u_int i = 0; i < 2; i++) {
-		if (!isdigit(nqn[i]))
-			return (false);
-	}
-	nqn += 2;
-
-	/* '.' between month and reverse domain name. */
-	if (nqn[0] != '.')
-		return (false);
-#endif
 	return (true);
 }
 
diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c
index 1f72391fb6b4..0efcfeac9eff 100644
--- a/sys/dev/pci/pci_iov.c
+++ b/sys/dev/pci/pci_iov.c
@@ -734,11 +734,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
 	first_rid = pci_get_rid(dev) + rid_off;
 	last_rid = first_rid + (num_vfs - 1) * rid_stride;
 
-	/* We don't yet support allocating extra bus numbers for VFs. */
 	if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
-		device_printf(dev, "not enough PCIe bus numbers for VFs\n");
-		error = ENOSPC;
-		goto out;
+		int rid = 0;
+		uint16_t last_rid_bus = PCI_RID2BUS(last_rid);
+
+		iov->iov_bus_res = bus_alloc_resource(bus, PCI_RES_BUS, &rid,
+		    last_rid_bus, last_rid_bus, 1, RF_ACTIVE);
+		if (iov->iov_bus_res == NULL) {
+			device_printf(dev,
+			    "failed to allocate PCIe bus number for VFs\n");
+			error = ENOSPC;
+			goto out;
+		}
 	}
 
 	if (!ari_enabled && PCI_RID2SLOT(last_rid) != 0) {
@@ -786,6 +793,11 @@ out:
 		}
 	}
 
+	if (iov->iov_bus_res != NULL) {
+		bus_release_resource(bus, iov->iov_bus_res);
+		iov->iov_bus_res = NULL;
+	}
+
 	if (iov->iov_flags & IOV_RMAN_INITED) {
 		rman_fini(&iov->rman);
 		iov->iov_flags &= ~IOV_RMAN_INITED;
@@ -896,6 +908,11 @@ pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
 		}
 	}
 
+	if (iov->iov_bus_res != NULL) {
+		bus_release_resource(bus, iov->iov_bus_res);
+		iov->iov_bus_res = NULL;
+	}
+
 	if (iov->iov_flags & IOV_RMAN_INITED) {
 		rman_fini(&iov->rman);
 		iov->iov_flags &= ~IOV_RMAN_INITED;
diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h
index 7ae2219b936d..ecf0a9b21be5 100644
--- a/sys/dev/pci/pci_iov_private.h
+++ b/sys/dev/pci/pci_iov_private.h
@@ -39,6 +39,8 @@ struct pcicfg_iov {
 	struct cdev *iov_cdev;
 	nvlist_t *iov_schema;
 
+	struct resource *iov_bus_res;
+
 	struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1];
 	struct rman rman;
 	char rman_name[64];
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 9d23d5df1d2b..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2308,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
         else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
 		if_setbaudrate(ifp, IF_Gbps(100));
 
-        if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
         if_setinitfn(ifp, qlnx_init);
         if_setsoftc(ifp, ha);
         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2343,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 
 	if_setcapabilities(ifp, IFCAP_HWCSUM);
 	if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2352,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 	if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
 
 	if_sethwtsomax(ifp,  QLNX_MAX_TSO_FRAME_SIZE -
 				(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
diff --git a/sys/dev/usb/input/usbhid.c b/sys/dev/usb/input/usbhid.c
index 3bb7d5e594e3..df810012b3f8 100644
--- a/sys/dev/usb/input/usbhid.c
+++ b/sys/dev/usb/input/usbhid.c
@@ -76,7 +76,7 @@
 #include "hid_if.h"
 
 static SYSCTL_NODE(_hw_usb, OID_AUTO, usbhid, CTLFLAG_RW, 0, "USB usbhid");
-static int usbhid_enable = 0;
+static int usbhid_enable = 1;
 SYSCTL_INT(_hw_usb_usbhid, OID_AUTO, enable, CTLFLAG_RWTUN,
     &usbhid_enable, 0, "Enable usbhid and prefer it to other USB HID drivers");
 #ifdef USB_DEBUG
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
 #include <dev/vmm/vmm_mem.h>
 #include <dev/vmm/vmm_stat.h>
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 struct vm_memseg_12 {
 	int		segid;
 	size_t		len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
 #define	VM_GET_MEMSEG_12	\
 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+	int		segid;
+	size_t		len;
+	char		name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+    "COMPAT_FREEBSD14 ABI");
+
+#define	VM_ALLOC_MEMSEG_14	\
+	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define	VM_GET_MEMSEG_14	\
+	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
 
 struct devmem_softc {
 	int	segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 }
 
 static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+    struct domainset *domainset)
 {
 	char *name;
 	int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
 		if (error)
 			goto done;
 	}
-
-	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
 	if (error)
 		goto done;
 
@@ -295,6 +311,20 @@ done:
 	return (error);
 }
 
+#if defined(__amd64__) && \
+    (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+	if (mseg->segid != VM_SYSMEM) {
+		mseg->segid += (VM_BOOTROM - 1);
+	}
+}
+#endif
+
 static int
 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
 	VMMDEV_IOCTL(VM_STAT_DESC, 0),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 	VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
 	VMMDEV_IOCTL(VM_REINIT,
 	    VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
 
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
 	VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 #endif
+#ifdef COMPAT_FREEBSD14
+	VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
 	VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 	VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
 
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	struct vmmdev_softc *sc;
 	struct vcpu *vcpu;
 	const struct vmmdev_ioctl *ioctl;
+	struct vm_memseg *mseg;
 	int error, vcpuid;
 
 	sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 		error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
 		break;
 	}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
 	case VM_ALLOC_MEMSEG_12:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg_12 *)0)->name));
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_12 *)0)->name), NULL);
 		break;
 	case VM_GET_MEMSEG_12:
-		error = get_memseg(sc, (struct vm_memseg *)data,
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
 		    sizeof(((struct vm_memseg_12 *)0)->name));
 		break;
-#endif
-	case VM_ALLOC_MEMSEG:
-		error = alloc_memseg(sc, (struct vm_memseg *)data,
-		    sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+	case VM_ALLOC_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = alloc_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+		break;
+	case VM_GET_MEMSEG_14:
+		mseg = (struct vm_memseg *)data;
+
+		adjust_segid(mseg);
+		error = get_memseg(sc, mseg,
+		    sizeof(((struct vm_memseg_14 *)0)->name));
+		break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+	case VM_ALLOC_MEMSEG: {
+		domainset_t *mask;
+		struct domainset *domainset, domain;
+
+		domainset = NULL;
+		mseg = (struct vm_memseg *)data;
+		if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+			if (mseg->ds_mask_size < sizeof(domainset_t) ||
+			    mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+				error = ERANGE;
+				break;
+			}
+			memset(&domain, 0, sizeof(domain));
+			mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+			error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			error = domainset_populate(&domain, mask, mseg->ds_policy,
+			    mseg->ds_mask_size);
+			if (error) {
+				free(mask, M_VMMDEV);
+				break;
+			}
+			domainset = domainset_create(&domain);
+			if (domainset == NULL) {
+				error = EINVAL;
+				free(mask, M_VMMDEV);
+				break;
+			}
+			free(mask, M_VMMDEV);
+		}
+		error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
 		break;
+	}
 	case VM_GET_MEMSEG:
 		error = get_memseg(sc, (struct vm_memseg *)data,
 		    sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_destroy, "A",
-    NULL);
+    "Destroy a vmm(4) instance (legacy interface)");
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 
 	buflen = VM_MAX_NAMELEN + 1;
 	buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
-	strlcpy(buf, "beavis", buflen);
 	error = sysctl_handle_string(oidp, buf, buflen, req);
 	if (error == 0 && req->newptr != NULL)
 		error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vmm_create, "A",
-    NULL);
+    "Create a vmm(4) instance (legacy interface)");
 
 static int
 vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
 
 #include <sys/types.h>
 #include <sys/lock.h>
+#include <sys/malloc.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
 }
 
 int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset)
 {
-	struct vm_mem *mem;
 	struct vm_mem_seg *seg;
+	struct vm_mem *mem;
 	vm_object_t obj;
 
 	mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 			return (EINVAL);
 	}
 
+	/*
+	 * When given an impossible policy, signal an
+	 * error to the user.
+	 */
+	if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+		return (EINVAL);
 	obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
 	if (obj == NULL)
 		return (ENOMEM);
 
 	seg->len = len;
 	seg->object = obj;
+	if (obj_domainset != NULL)
+		seg->object->domain.dr_policy = obj_domainset;
 	seg->sysmem = sysmem;
+
 	return (0);
 }
 
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
 #ifndef _DEV_VMM_MEM_H_
 #define	_DEV_VMM_MEM_H_
 
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+        VM_SYSMEM = 0,
+        VM_BOOTROM = VM_MAXSYSMEM,
+        VM_FRAMEBUFFER,
+        VM_PCIROM,
+        VM_MEMSEG_END
+};
+
+#define	VM_MAX_MEMSEGS	VM_MEMSEG_END
+#define	VM_MAX_MEMMAPS	(VM_MAX_MEMSEGS * 2)
+
 #ifdef _KERNEL
 
 #include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
 	int		flags;
 };
 
-#define	VM_MAX_MEMSEGS	4
-#define	VM_MAX_MEMMAPS	8
-
 struct vm_mem {
 	struct vm_mem_map	mem_maps[VM_MAX_MEMMAPS];
 	struct vm_mem_seg	mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot, int flags);
 int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+    struct domainset *obj_domainset);
 void vm_free_memseg(struct vm *vm, int ident);
 
 /*
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
 
 	/* Reinit VGA mode, to restore view after app which change mode. */
 	vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
-	/* Ask vt(9) to update chars on visible area. */
+	/* Ask vt(4) to update chars on visible area. */
 	vd->vd_flags |= VDF_INVALID;
 }
 
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
 			(vw)->vw_number)
 
 static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "vt(9) parameters");
+    "vt(4) parameters");
 static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
 static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
 static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
 static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");