aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/amdsmu/amdsmu.c466
-rw-r--r--sys/dev/amdsmu/amdsmu.h95
-rw-r--r--sys/dev/amdsmu/amdsmu_reg.h84
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c6
-rw-r--r--sys/dev/cxgbe/tom/t4_tls.c4
-rw-r--r--sys/dev/drm2/drm_fb_helper.c2
-rw-r--r--sys/dev/efidev/efirt.c42
-rw-r--r--sys/dev/iicbus/iichid.c74
-rw-r--r--sys/dev/md/md.c23
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c295
-rw-r--r--sys/dev/nvme/nvme_private.h4
-rw-r--r--sys/dev/nvmf/controller/nvmft_subr.c40
-rw-r--r--sys/dev/pci/pci_iov.c25
-rw-r--r--sys/dev/pci/pci_iov_private.h2
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_os.c5
-rw-r--r--sys/dev/usb/input/usbhid.c2
-rw-r--r--sys/dev/vmm/vmm_dev.c135
-rw-r--r--sys/dev/vmm/vmm_mem.c15
-rw-r--r--sys/dev/vmm/vmm_mem.h27
-rw-r--r--sys/dev/vt/hw/vga/vt_vga.c2
-rw-r--r--sys/dev/vt/vt_core.c4
21 files changed, 1092 insertions, 260 deletions
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+ const uint16_t vendorid = pci_get_vendor(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+
+ for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+ const struct amdsmu_product *prod = &amdsmu_products[i];
+
+ if (vendorid == prod->amdsmu_vendorid &&
+ deviceid == prod->amdsmu_deviceid) {
+ if (product_out != NULL)
+ *product_out = prod;
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+ if (device_find_child(parent, "amdsmu", -1) != NULL)
+ return;
+
+ if (amdsmu_match(parent, NULL)) {
+ if (device_add_child(parent, "amdsmu", -1) == NULL)
+ device_printf(parent, "add amdsmu child failed\n");
+ }
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+ if (resource_disabled("amdsmu", 0))
+ return (ENXIO);
+ if (!amdsmu_match(device_get_parent(dev), NULL))
+ return (ENXIO);
+ device_set_descf(dev, "AMD System Management Unit");
+
+ return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /*
+ * The SMU has a response ready for us when the response register is
+ * set. Otherwise, we must wait.
+ */
+ for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+ res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+ if (res != SMU_RES_WAIT)
+ return (res);
+ pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+ C_HARDCLOCK);
+ }
+ device_printf(dev, "timed out waiting for response from SMU\n");
+ return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /* Wait for SMU to be ready. */
+ if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+ return (ETIMEDOUT);
+
+ /* Clear previous response. */
+ amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+ /* Write out command to registers. */
+ amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+ amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+ /* Wait for SMU response and handle it. */
+ res = amdsmu_wait_res(dev);
+
+ switch (res) {
+ case SMU_RES_WAIT:
+ return (ETIMEDOUT);
+ case SMU_RES_OK:
+ if (ret != NULL)
+ *ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+ return (0);
+ case SMU_RES_REJECT_BUSY:
+ device_printf(dev, "SMU is busy\n");
+ return (EBUSY);
+ case SMU_RES_REJECT_PREREQ:
+ case SMU_RES_UNKNOWN:
+ case SMU_RES_FAILED:
+ device_printf(dev, "SMU error: %02x\n", res);
+ return (EIO);
+ }
+
+ return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+ int err;
+ uint32_t smu_vers;
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+ if (err != 0) {
+ device_printf(dev, "failed to get SMU version\n");
+ return (err);
+ }
+ sc->smu_program = (smu_vers >> 24) & 0xFF;
+ sc->smu_maj = (smu_vers >> 16) & 0xFF;
+ sc->smu_min = (smu_vers >> 8) & 0xFF;
+ sc->smu_rev = smu_vers & 0xFF;
+ device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+ sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+ return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+ int err;
+ struct amdsmu_metrics *m = &sc->metrics;
+ bool active;
+ char sysctl_descr[32];
+
+ /* Get IP block count. */
+ switch (deviceid) {
+ case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+ sc->ip_block_count = 12;
+ break;
+ case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+ sc->ip_block_count = 21;
+ break;
+ /* TODO How many IP blocks does Strix Point (and the others) have? */
+ case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+ default:
+ sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+ }
+ KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+ ("too many IP blocks for array"));
+
+ /* Get and print out IP blocks. */
+ err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+ &sc->active_ip_blocks);
+ if (err != 0) {
+ device_printf(dev, "failed to get IP blocks\n");
+ return (err);
+ }
+ device_printf(dev, "Active IP blocks: ");
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ active = (sc->active_ip_blocks & (1 << i)) != 0;
+ sc->ip_blocks_active[i] = active;
+ if (!active)
+ continue;
+ printf("%s%s", amdsmu_ip_blocks_names[i],
+ i + 1 < sc->ip_block_count ? " " : "\n");
+ }
+
+ /* Create a sysctl node for IP blocks. */
+ sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+ CTLFLAG_RD, NULL, "SMU metrics");
+ if (sc->ip_blocks_sysctlnode == NULL) {
+ device_printf(dev, "could not add sysctl node for IP blocks\n");
+ return (ENOMEM);
+ }
+
+ /* Create a sysctl node for each IP block. */
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ /* Create the sysctl node itself for the IP block. */
+ snprintf(sysctl_descr, sizeof sysctl_descr,
+ "Metrics about the %s AMD IP block",
+ amdsmu_ip_blocks_names[i]);
+ sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+ amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+ if (sc->ip_block_sysctlnodes[i] == NULL) {
+ device_printf(dev,
+ "could not add sysctl node for \"%s\"\n", sysctl_descr);
+ continue;
+ }
+ /*
+ * Create sysctls for if the IP block is currently active, last
+ * active time, and total active time.
+ */
+ SYSCTL_ADD_BOOL(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+ "IP block is currently active");
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+ 0, "How long the IP block was active for during the last"
+ " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+ 0, "How long the IP block was active for during sleep in"
+ " total (us)");
+#endif
+ }
+ return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t metrics_addr_lo, metrics_addr_hi;
+ uint64_t metrics_addr;
+
+ /* Get physical address of logging buffer. */
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+ if (err != 0)
+ return (err);
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+ if (err != 0)
+ return (err);
+ metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+ /* Map memory of logging buffer. */
+ err = bus_space_map(sc->bus_tag, metrics_addr,
+ sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+ if (err != 0) {
+ device_printf(dev, "could not map bus space for SMU metrics\n");
+ return (err);
+ }
+
+ /* Start logging for metrics. */
+ amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+ amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+ return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+ if (err != 0) {
+ device_printf(dev, "failed to dump metrics\n");
+ return (err);
+ }
+ bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+ (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+ return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t physbase_addr_lo, physbase_addr_hi;
+ uint64_t physbase_addr;
+ int rid = 0;
+ struct sysctl_oid *node;
+
+ /*
+ * Find physical base address for SMU.
+ * XXX I am a little confused about the masks here. I'm just copying
+ * what Linux does in the amd-pmc driver to get the base address.
+ */
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+ physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+ physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+ physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+ /* Map memory for SMU and its registers. */
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "could not allocate resource\n");
+ return (ENXIO);
+ }
+
+ sc->bus_tag = rman_get_bustag(sc->res);
+
+ if (bus_space_map(sc->bus_tag, physbase_addr,
+ SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU\n");
+ err = ENXIO;
+ goto err_smu_space;
+ }
+ if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+ SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU regs\n");
+ err = ENXIO;
+ goto err_reg_space;
+ }
+
+ /* sysctl stuff. */
+ sc->sysctlctx = device_get_sysctl_ctx(dev);
+ sc->sysctlnode = device_get_sysctl_tree(dev);
+
+ /* Get version & add sysctls. */
+ if ((err = amdsmu_get_vers(dev)) != 0)
+ goto err_dump;
+
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+ "SMU firmware major version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+ "SMU firmware minor version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+ "SMU firmware revision number");
+
+ /* Set up for getting metrics & add sysctls. */
+ if ((err = amdsmu_init_metrics(dev)) != 0)
+ goto err_dump;
+ if ((err = amdsmu_dump_metrics(dev)) != 0)
+ goto err_dump;
+
+ node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+ OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+ if (node == NULL) {
+ device_printf(dev, "could not add sysctl node for metrics\n");
+ err = ENOMEM;
+ goto err_dump;
+ }
+
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+ "SMU metrics table version");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+ "How many times the sleep hint was set");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "s0i3_last_entry_status", CTLFLAG_RD,
+ &sc->metrics.s0i3_last_entry_status, 0,
+ "1 if last S0i3 entry was successful");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+ "Time spent in S0i2 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.time_last_entering_s0i3, 0,
+ "Time spent entering S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.total_time_entering_s0i3, 0,
+ "Total time spent entering S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+ 0, "Time spent resuming from last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+ 0, "Total time spent resuming from sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+ "Time spent in S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+ 0, "Total time spent in S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.time_last_in_sw_drips, 0,
+ "Time spent in awake during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.total_time_in_sw_drips, 0,
+ "Total time spent awake (us)");
+
+ /* Get IP blocks & add sysctls. */
+ err = amdsmu_get_ip_blocks(dev);
+ if (err != 0)
+ goto err_dump;
+
+ /* Get idlemask & add sysctl. */
+ amdsmu_fetch_idlemask(dev);
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+ "value is not documented - only used to help AMD internally debug "
+ "issues");
+
+ return (0);
+err_dump:
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int rid = 0;
+
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+ DEVMETHOD(device_identify, amdsmu_identify),
+ DEVMETHOD(device_probe, amdsmu_probe),
+ DEVMETHOD(device_attach, amdsmu_attach),
+ DEVMETHOD(device_detach, amdsmu_detach),
+ DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+ "amdsmu",
+ amdsmu_methods,
+ sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+ nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define _AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US 50
+#define SMU_RES_READ_MAX 20000
+
+static const struct amdsmu_product {
+ uint16_t amdsmu_vendorid;
+ uint16_t amdsmu_deviceid;
+} amdsmu_products[] = {
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_PHOENIX_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+ "DISPLAY",
+ "CPU",
+ "GFX",
+ "VDD",
+ "ACP",
+ "VCN",
+ "ISP",
+ "NBIO",
+ "DF",
+ "USB3_0",
+ "USB3_1",
+ "LAPIC",
+ "USB3_2",
+ "USB3_3",
+ "USB3_4",
+ "USB4_0",
+ "USB4_1",
+ "MPM",
+ "JPEG",
+ "IPU",
+ "UMSCH",
+ "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+ struct sysctl_ctx_list *sysctlctx;
+ struct sysctl_oid *sysctlnode;
+
+ struct resource *res;
+ bus_space_tag_t bus_tag;
+
+ bus_space_handle_t smu_space;
+ bus_space_handle_t reg_space;
+
+ uint8_t smu_program;
+ uint8_t smu_maj, smu_min, smu_rev;
+
+ uint32_t active_ip_blocks;
+ struct sysctl_oid *ip_blocks_sysctlnode;
+ size_t ip_block_count;
+ struct sysctl_oid *ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+ bool ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+ bus_space_handle_t metrics_space;
+ struct amdsmu_metrics metrics;
+ uint32_t idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+ return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+ bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define _AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out? Also, there are way more of these. I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT 0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT 0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT 0x14A4
+
+#define SMU_INDEX_ADDRESS 0xB8
+#define SMU_INDEX_DATA 0xBC
+
+#define SMU_PHYSBASE_ADDR_LO 0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI 0x13B102EC
+
+#define SMU_MEM_SIZE 0x1000
+#define SMU_REG_SPACE_OFF 0x10000
+
+#define SMU_REG_MESSAGE 0x538
+#define SMU_REG_RESPONSE 0x980
+#define SMU_REG_ARGUMENT 0x9BC
+#define SMU_REG_IDLEMASK 0xD14
+
+enum amdsmu_res {
+ SMU_RES_WAIT = 0x00,
+ SMU_RES_OK = 0x01,
+ SMU_RES_REJECT_BUSY = 0xFC,
+ SMU_RES_REJECT_PREREQ = 0xFD,
+ SMU_RES_UNKNOWN = 0xFE,
+ SMU_RES_FAILED = 0xFF,
+};
+
+enum amdsmu_msg {
+ SMU_MSG_GETSMUVERSION = 0x02,
+ SMU_MSG_LOG_GETDRAM_ADDR_HI = 0x04,
+ SMU_MSG_LOG_GETDRAM_ADDR_LO = 0x05,
+ SMU_MSG_LOG_START = 0x06,
+ SMU_MSG_LOG_RESET = 0x07,
+ SMU_MSG_LOG_DUMP_DATA = 0x08,
+ SMU_MSG_GET_SUP_CONSTRAINTS = 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+ uint32_t table_version;
+ uint32_t hint_count;
+ uint32_t s0i3_last_entry_status;
+ uint32_t time_last_in_s0i2;
+ uint64_t time_last_entering_s0i3;
+ uint64_t total_time_entering_s0i3;
+ uint64_t time_last_resuming;
+ uint64_t total_time_resuming;
+ uint64_t time_last_in_s0i3;
+ uint64_t total_time_in_s0i3;
+ uint64_t time_last_in_sw_drips;
+ uint64_t total_time_in_sw_drips;
+ /*
+ * This is how long each IP block was active for (us), i.e., blocking
+ * entry to S0i3. In Linux, these are called "timecondition_notmet_*".
+ *
+ * XXX Total active time for IP blocks seems to be buggy and reporting
+ * garbage (at least on Phoenix), so it's disabled for now. The last
+ * active time for the USB4_0 IP block also seems to be buggy.
+ */
+ uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 8547f21586e1..7a6b1cbdd736 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -703,7 +703,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
for (m = sndptr; m != NULL; m = m->m_next) {
int n;
- if ((m->m_flags & M_NOTAVAIL) != 0)
+ if ((m->m_flags & M_NOTREADY) != 0)
break;
if (m->m_flags & M_EXTPG) {
#ifdef KERN_TLS
@@ -787,7 +787,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
/* nothing to send */
if (plen == 0) {
- KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
+ KASSERT(m == NULL || (m->m_flags & M_NOTREADY) != 0,
("%s: nothing to send, but m != NULL is ready",
__func__));
break;
@@ -880,7 +880,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
toep->txsd_avail--;
t4_l2t_send(sc, wr, toep->l2te);
- } while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
+ } while (m != NULL && (m->m_flags & M_NOTREADY) == 0);
/* Send a FIN if requested, but only if there's no more data to send */
if (m == NULL && toep->flags & TPF_SEND_FIN)
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index c6377980fca9..27c16b9988ae 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -563,7 +563,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
* If there is no ready data to send, wait until more
* data arrives.
*/
- if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
+ if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
if (sowwakeup)
sowwakeup_locked(so);
else
@@ -614,7 +614,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
/* Shove if there is no additional data pending. */
shove = ((m->m_next == NULL ||
- (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
+ (m->m_next->m_flags & M_NOTREADY) != 0)) &&
(tp->t_flags & TF_MORETOCOME) == 0;
if (sb->sb_flags & SB_AUTOSIZE &&
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
struct task fb_mode_task;
};
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
static void
vt_restore_fbdev_mode(void *arg, int pending)
{
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
enum efi_table_type {
TYPE_ESRT = 0,
- TYPE_PROP
+ TYPE_PROP,
+ TYPE_MEMORY_ATTR
};
static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
free(buf, M_TEMP);
return (0);
}
+ case TYPE_MEMORY_ATTR:
+ {
+ efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+ struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+ int error;
+ void *buf;
+ size_t len = sizeof(struct efi_memory_attribute_table);
+
+ error = efi_get_table(&guid, (void **)&tbl_addr);
+ if (error)
+ return (error);
+
+ buf = malloc(len, M_TEMP, M_WAITOK);
+ error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+ if (error) {
+ free(buf, M_TEMP);
+ return (error);
+ }
+
+ mem_addr = (struct efi_memory_attribute_table *)buf;
+ if (mem_addr->version != 2) {
+ free(buf, M_TEMP);
+ return (EINVAL);
+ }
+ len += mem_addr->descriptor_size * mem_addr->num_ents;
+ if (len > EFI_TABLE_ALLOC_MAX) {
+ free(buf, M_TEMP);
+ return (ENOMEM);
+ }
+
+ *table_len = len;
+ if (taddr != NULL)
+ *taddr = tbl_addr;
+ free(buf, M_TEMP);
+ return (0);
+ }
}
return (ENOENT);
}
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
enum efi_table_type type;
} tables[] = {
{ EFI_TABLE_ESRT, TYPE_ESRT },
- { EFI_PROPERTIES_TABLE, TYPE_PROP }
+ { EFI_PROPERTIES_TABLE, TYPE_PROP },
+ { EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
};
size_t table_idx;
void *taddr;
diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c
index 9c0324a24685..3f1d7a0cefba 100644
--- a/sys/dev/iicbus/iichid.c
+++ b/sys/dev/iicbus/iichid.c
@@ -275,62 +275,36 @@ iichid_cmd_read(struct iichid_softc* sc, void *buf, iichid_size_t maxlen,
* 6.1.3 - Retrieval of Input Reports
* DEVICE returns the length (2 Bytes) and the entire Input Report.
*/
- uint8_t actbuf[2] = { 0, 0 };
- /* Read actual input report length. */
+
+ memset(buf, 0xaa, 2); // In case nothing gets read
struct iic_msg msgs[] = {
- { sc->addr, IIC_M_RD | IIC_M_NOSTOP, sizeof(actbuf), actbuf },
+ { sc->addr, IIC_M_RD, maxlen, buf },
};
- uint16_t actlen;
int error;
error = iicbus_transfer(sc->dev, msgs, nitems(msgs));
if (error != 0)
return (error);
- actlen = actbuf[0] | actbuf[1] << 8;
-#ifdef IICHID_SAMPLING
- if ((actlen == 0 && sc->sampling_rate_slow < 0) ||
- (maxlen == 0 && sc->sampling_rate_slow >= 0)) {
-#else
+ DPRINTFN(sc, 5, "%*D\n", msgs[0].len, msgs[0].buf, " ");
+
+ uint16_t actlen = le16dec(buf);
+
if (actlen == 0) {
-#endif
- /* Read and discard reset command response. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART,
- le16toh(sc->desc.wMaxInputLength) - 2, sc->intr_buf };
- actlen = 0;
if (!sc->reset_acked) {
mtx_lock(&sc->mtx);
sc->reset_acked = true;
wakeup(&sc->reset_acked);
mtx_unlock(&sc->mtx);
}
-#ifdef IICHID_SAMPLING
- } else if ((actlen <= 2 || actlen == 0xFFFF) &&
- sc->sampling_rate_slow >= 0) {
- /* Read and discard 1 byte to send I2C STOP condition. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART, 1, actbuf };
- actlen = 0;
-#endif
- } else {
- actlen -= 2;
- if (actlen > maxlen) {
- DPRINTF(sc, "input report too big. requested=%d "
- "received=%d\n", maxlen, actlen);
- actlen = maxlen;
- }
- /* Read input report itself. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART, actlen, buf };
}
- error = iicbus_transfer(sc->dev, msgs, 1);
- if (error == 0 && actual_len != NULL)
+ if (actlen <= 2 || actlen > maxlen) {
+ actlen = 0;
+ }
+ if (actual_len != NULL) {
*actual_len = actlen;
-
- DPRINTFN(sc, 5,
- "%*D - %*D\n", 2, actbuf, " ", msgs[0].len, msgs[0].buf, " ");
+ }
return (error);
}
@@ -566,7 +540,7 @@ iichid_sampling_task(void *context, int pending)
error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
if (error == 0) {
if (actual > 0) {
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
sc->missing_samples = 0;
if (sc->dup_size != actual ||
memcmp(sc->dup_buf, sc->intr_buf, actual) != 0) {
@@ -577,7 +551,7 @@ iichid_sampling_task(void *context, int pending)
++sc->dup_samples;
} else {
if (++sc->missing_samples == 1)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, 0);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, 0);
sc->dup_samples = 0;
}
} else
@@ -632,7 +606,7 @@ iichid_intr(void *context)
if (error == 0) {
if (sc->power_on && sc->open) {
if (actual != 0)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf,
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2,
actual);
else
DPRINTF(sc, "no data received\n");
@@ -842,11 +816,12 @@ iichid_intr_setup(device_t dev, device_t child __unused, hid_intr_t intr,
sc = device_get_softc(dev);
/*
- * Do not rely on wMaxInputLength, as some devices may set it to
- * a wrong length. Find the longest input report in report descriptor.
+ * Do not rely just on wMaxInputLength, as some devices (which?)
+ * may set it to a wrong length. Also find the longest input report
+ * in report descriptor, and add two for the length field.
*/
- rdesc->rdsize =
- MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength) - 2);
+ rdesc->rdsize = 2 +
+ MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength));
/* Write and get/set_report sizes are limited by I2C-HID protocol. */
rdesc->grsize = rdesc->srsize = IICHID_SIZE_MAX;
rdesc->wrsize = IICHID_SIZE_MAX;
@@ -919,7 +894,7 @@ iichid_intr_poll(device_t dev, device_t child __unused)
sc = device_get_softc(dev);
error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
if (error == 0 && actual != 0)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
}
/*
@@ -946,6 +921,7 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
{
struct iichid_softc *sc;
device_t parent;
+ uint8_t *tmpbuf;
int error;
if (maxlen > IICHID_SIZE_MAX)
@@ -954,8 +930,12 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
parent = device_get_parent(sc->dev);
error = iicbus_request_bus(parent, sc->dev, IIC_WAIT);
if (error == 0) {
- error = iichid_cmd_read(sc, buf, maxlen, actlen);
+ tmpbuf = malloc(maxlen + 2, M_DEVBUF, M_WAITOK | M_ZERO);
+ error = iichid_cmd_read(sc, tmpbuf, maxlen + 2, actlen);
iicbus_release_bus(parent, sc->dev);
+ if (*actlen > 0)
+ memcpy(buf, tmpbuf + 2, *actlen);
+ free(tmpbuf, M_DEVBUF);
}
return (iic2errno(error));
}
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 29dc0c880e3a..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -89,6 +89,8 @@
#include <sys/unistd.h>
#include <sys/vnode.h>
#include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
#include <geom/geom.h>
#include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
{
caddr_t mod;
u_char *ptr, *name, *type;
+ u_char scratch[40];
unsigned len;
int i;
+ vm_offset_t paddr;
/* figure out log2(NINDIR) */
for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
sx_xunlock(&md_sx);
}
}
+
+ /*
+ * Load up to 32 pre-loaded disks
+ */
+ for (int i = 0; i < 32; i++) {
+ if (resource_long_value("md", i, "physaddr",
+ (long *) &paddr) != 0 ||
+ resource_int_value("md", i, "len", &len) != 0)
+ break;
+ ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+ if (ptr != NULL && len != 0) {
+ sprintf(scratch, "preload%d 0x%016jx", i,
+ (uintmax_t)paddr);
+ sx_xlock(&md_sx);
+ md_preloaded(ptr, len, scratch);
+ sx_xunlock(&md_sx);
+ }
+ }
+
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
0600, MDCTL_NAME);
g_topology_lock();
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
- struct nvme_async_event_request *aer);
+ struct nvme_async_event_request *aer);
static void
nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
}
static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
- struct nvme_async_event_request *aer = arg;
- struct nvme_health_information_page *health_info;
- struct nvme_ns_list *nsl;
- struct nvme_error_information_entry *err;
- int i;
-
- /*
- * If the log page fetch for some reason completed with an error,
- * don't pass log page data to the consumers. In practice, this case
- * should never happen.
- */
- if (nvme_completion_is_error(cpl))
- nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
- aer->log_page_id, NULL, 0);
- else {
- /* Convert data to host endian */
- switch (aer->log_page_id) {
- case NVME_LOG_ERROR:
- err = (struct nvme_error_information_entry *)aer->log_page_buffer;
- for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
- nvme_error_information_entry_swapbytes(err++);
- break;
- case NVME_LOG_HEALTH_INFORMATION:
- nvme_health_information_page_swapbytes(
- (struct nvme_health_information_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_CHANGED_NAMESPACE:
- nvme_ns_list_swapbytes(
- (struct nvme_ns_list *)aer->log_page_buffer);
- break;
- case NVME_LOG_COMMAND_EFFECT:
- nvme_command_effects_page_swapbytes(
- (struct nvme_command_effects_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_RES_NOTIFICATION:
- nvme_res_notification_page_swapbytes(
- (struct nvme_res_notification_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_SANITIZE_STATUS:
- nvme_sanitize_status_page_swapbytes(
- (struct nvme_sanitize_status_page *)aer->log_page_buffer);
- break;
- default:
- break;
- }
-
- if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
- health_info = (struct nvme_health_information_page *)
- aer->log_page_buffer;
- nvme_ctrlr_log_critical_warnings(aer->ctrlr,
- health_info->critical_warning);
- /*
- * Critical warnings reported through the
- * SMART/health log page are persistent, so
- * clear the associated bits in the async event
- * config so that we do not receive repeated
- * notifications for the same event.
- */
- aer->ctrlr->async_event_config &=
- ~health_info->critical_warning;
- nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
- aer->ctrlr->async_event_config, NULL, NULL);
- } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
- !nvme_use_nvd) {
- nsl = (struct nvme_ns_list *)aer->log_page_buffer;
- for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
- if (nsl->ns[i] > NVME_MAX_NAMESPACES)
- break;
- nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
- }
- }
-
- /*
- * Pass the cpl data from the original async event completion,
- * not the log page fetch.
- */
- nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
- aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
- }
-
- /*
- * Repost another asynchronous event request to replace the one
- * that just completed.
- */
- nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
{
struct nvme_async_event_request *aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
return;
}
- /* Associated log page is in bits 23:16 of completion entry dw0. */
+ /*
+ * Save the completion status and associated log page is in bits 23:16
+ * of completion entry dw0. Print a message and queue it for further
+ * processing.
+ */
+ memcpy(&aer->cpl, cpl, sizeof(*cpl));
aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
" page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
aer->log_page_id);
-
- if (is_log_page_id_valid(aer->log_page_id)) {
- aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
- aer->log_page_id);
- memcpy(&aer->cpl, cpl, sizeof(*cpl));
- nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
- NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
- aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
- aer);
- /* Wait to notify consumers until after log page is fetched. */
- } else {
- nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
- NULL, 0);
-
- /*
- * Repost another asynchronous event request to replace the one
- * that just completed.
- */
- nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
- }
+ taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
}
static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
{
struct nvme_request *req;
- aer->ctrlr = ctrlr;
/*
- * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
- * callback context. AER completions should be handled on a dedicated
- * thread.
+ * We're racing the reset thread, so let that process submit this again.
+ * XXX does this really solve that race? And is that race even possible
+ * since we only reset when we've no theard from the card in a long
+ * time. Why would we get an AER in the middle of that just before we
+ * kick off the reset?
*/
- req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+ if (ctrlr->is_resetting)
+ return;
+
+ aer->ctrlr = ctrlr;
+ req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
aer);
aer->req = req;
+ aer->log_page_id = 0; /* Not a valid page */
/*
* Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
}
+static void
+nvme_ctrlr_aer_done(void *arg, const struct nvme_completion *cpl)
+{
+ struct nvme_async_event_request *aer = arg;
+
+ mtx_lock(&aer->mtx);
+ if (nvme_completion_is_error(cpl))
+ aer->log_page_size = (uint32_t)-1;
+ else
+ aer->log_page_size = nvme_ctrlr_get_log_page_size(
+ aer->ctrlr, aer->log_page_id);
+ wakeup(aer);
+ mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+ struct nvme_async_event_request *aer = arg;
+ struct nvme_controller *ctrlr = aer->ctrlr;
+ uint32_t len;
+
+ /*
+ * We're resetting, so just punt.
+ */
+ if (ctrlr->is_resetting)
+ return;
+
+ if (!is_log_page_id_valid(aer->log_page_id)) {
+ /*
+ * Repost another asynchronous event request to replace the one
+ * that just completed.
+ */
+ nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+ NULL, 0);
+ nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+ goto out;
+ }
+
+ aer->log_page_size = 0;
+ len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+ nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+ NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+ nvme_ctrlr_aer_done, aer);
+ mtx_lock(&aer->mtx);
+ while (aer->log_page_size == 0)
+ mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+ mtx_unlock(&aer->mtx);
+
+ if (aer->log_page_size != (uint32_t)-1) {
+ /*
+ * If the log page fetch for some reason completed with an
+ * error, don't pass log page data to the consumers. In
+ * practice, this case should never happen.
+ */
+ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+ aer->log_page_id, NULL, 0);
+ goto out;
+ }
+
+ /* Convert data to host endian */
+ switch (aer->log_page_id) {
+ case NVME_LOG_ERROR: {
+ struct nvme_error_information_entry *err =
+ (struct nvme_error_information_entry *)aer->log_page_buffer;
+ for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+ nvme_error_information_entry_swapbytes(err++);
+ break;
+ }
+ case NVME_LOG_HEALTH_INFORMATION:
+ nvme_health_information_page_swapbytes(
+ (struct nvme_health_information_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_CHANGED_NAMESPACE:
+ nvme_ns_list_swapbytes(
+ (struct nvme_ns_list *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_COMMAND_EFFECT:
+ nvme_command_effects_page_swapbytes(
+ (struct nvme_command_effects_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_RES_NOTIFICATION:
+ nvme_res_notification_page_swapbytes(
+ (struct nvme_res_notification_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_SANITIZE_STATUS:
+ nvme_sanitize_status_page_swapbytes(
+ (struct nvme_sanitize_status_page *)aer->log_page_buffer);
+ break;
+ default:
+ break;
+ }
+
+ if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+ struct nvme_health_information_page *health_info =
+ (struct nvme_health_information_page *)aer->log_page_buffer;
+
+ /*
+ * Critical warnings reported through the SMART/health log page
+ * are persistent, so clear the associated bits in the async
+ * event config so that we do not receive repeated notifications
+ * for the same event.
+ */
+ nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+ health_info->critical_warning);
+ aer->ctrlr->async_event_config &=
+ ~health_info->critical_warning;
+ nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+ aer->ctrlr->async_event_config, NULL, NULL);
+ } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+ struct nvme_ns_list *nsl =
+ (struct nvme_ns_list *)aer->log_page_buffer;
+ for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+ if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+ break;
+ nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+ }
+ }
+
+ /*
+ * Pass the cpl data from the original async event completion, not the
+ * log page fetch.
+ */
+ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+ aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+ /*
+ * Repost another asynchronous event request to replace the one
+ * that just completed.
+ */
+out:
+ nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
/*
* Poll all the queues enabled on the device for completion.
*/
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
/*
* Create 2 threads for the taskqueue. The reset thread will block when
* it detects that the controller has failed until all I/O has been
- * failed up the stack. The fail_req task needs to be able to run in
- * this case to finish the request failure for some cases.
- *
- * We could partially solve this race by draining the failed requeust
- * queue before proceding to free the sim, though nothing would stop
- * new I/O from coming in after we do that drain, but before we reach
- * cam_sim_free, so this big hammer is used instead.
+ * failed up the stack. The second thread is used for AER events, which
+ * can block, but only briefly for memory and log page fetching.
*/
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
ctrlr->is_initialized = false;
ctrlr->notification_sent = 0;
TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
- STAILQ_INIT(&ctrlr->fail_req);
+ for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+ struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+ TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+ mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+ }
ctrlr->is_failed = false;
make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
}
noadminq:
- if (ctrlr->taskqueue)
+ if (ctrlr->taskqueue) {
taskqueue_free(ctrlr->taskqueue);
+ for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+ struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+ mtx_destroy(&aer->mtx);
+ }
+ }
if (ctrlr->tag)
bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
struct nvme_async_event_request {
struct nvme_controller *ctrlr;
struct nvme_request *req;
+ struct task task;
+ struct mtx mtx;
struct nvme_completion cpl;
uint32_t log_page_id;
uint32_t log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
bool isr_warned;
bool is_initialized;
- STAILQ_HEAD(, nvme_request) fail_req;
-
/* Host Memory Buffer */
int hmb_nchunks;
size_t hmb_chunk;
diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c
index bb2bc0988e81..245971813854 100644
--- a/sys/dev/nvmf/controller/nvmft_subr.c
+++ b/sys/dev/nvmf/controller/nvmft_subr.c
@@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn)
len = strnlen(nqn, NVME_NQN_FIELD_SIZE);
if (len == 0 || len > NVMF_NQN_MAX_LEN)
return (false);
-
-#ifdef STRICT_CHECKS
- /*
- * Stricter checks from the spec. Linux does not seem to
- * require these.
- */
-
- /*
- * NVMF_NQN_MIN_LEN does not include '.', and require at least
- * one character of a domain name.
- */
- if (len < NVMF_NQN_MIN_LEN + 2)
- return (false);
- if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
- return (false);
- nqn += strlen("nqn.");
-
- /* Next 4 digits must be a year. */
- for (u_int i = 0; i < 4; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 4;
-
- /* '-' between year and month. */
- if (nqn[0] != '-')
- return (false);
- nqn++;
-
- /* 2 digit month. */
- for (u_int i = 0; i < 2; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 2;
-
- /* '.' between month and reverse domain name. */
- if (nqn[0] != '.')
- return (false);
-#endif
return (true);
}
diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c
index 1f72391fb6b4..0efcfeac9eff 100644
--- a/sys/dev/pci/pci_iov.c
+++ b/sys/dev/pci/pci_iov.c
@@ -734,11 +734,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
first_rid = pci_get_rid(dev) + rid_off;
last_rid = first_rid + (num_vfs - 1) * rid_stride;
- /* We don't yet support allocating extra bus numbers for VFs. */
if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
- device_printf(dev, "not enough PCIe bus numbers for VFs\n");
- error = ENOSPC;
- goto out;
+ int rid = 0;
+ uint16_t last_rid_bus = PCI_RID2BUS(last_rid);
+
+ iov->iov_bus_res = bus_alloc_resource(bus, PCI_RES_BUS, &rid,
+ last_rid_bus, last_rid_bus, 1, RF_ACTIVE);
+ if (iov->iov_bus_res == NULL) {
+ device_printf(dev,
+ "failed to allocate PCIe bus number for VFs\n");
+ error = ENOSPC;
+ goto out;
+ }
}
if (!ari_enabled && PCI_RID2SLOT(last_rid) != 0) {
@@ -786,6 +793,11 @@ out:
}
}
+ if (iov->iov_bus_res != NULL) {
+ bus_release_resource(bus, iov->iov_bus_res);
+ iov->iov_bus_res = NULL;
+ }
+
if (iov->iov_flags & IOV_RMAN_INITED) {
rman_fini(&iov->rman);
iov->iov_flags &= ~IOV_RMAN_INITED;
@@ -896,6 +908,11 @@ pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
}
}
+ if (iov->iov_bus_res != NULL) {
+ bus_release_resource(bus, iov->iov_bus_res);
+ iov->iov_bus_res = NULL;
+ }
+
if (iov->iov_flags & IOV_RMAN_INITED) {
rman_fini(&iov->rman);
iov->iov_flags &= ~IOV_RMAN_INITED;
diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h
index 7ae2219b936d..ecf0a9b21be5 100644
--- a/sys/dev/pci/pci_iov_private.h
+++ b/sys/dev/pci/pci_iov_private.h
@@ -39,6 +39,8 @@ struct pcicfg_iov {
struct cdev *iov_cdev;
nvlist_t *iov_schema;
+ struct resource *iov_bus_res;
+
struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1];
struct rman rman;
char rman_name[64];
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 9d23d5df1d2b..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2308,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
if_setbaudrate(ifp, IF_Gbps(100));
- if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
if_setinitfn(ifp, qlnx_init);
if_setsoftc(ifp, ha);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2343,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
if_setcapabilities(ifp, IFCAP_HWCSUM);
if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2352,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
if_sethwtsomax(ifp, QLNX_MAX_TSO_FRAME_SIZE -
(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
diff --git a/sys/dev/usb/input/usbhid.c b/sys/dev/usb/input/usbhid.c
index 3bb7d5e594e3..df810012b3f8 100644
--- a/sys/dev/usb/input/usbhid.c
+++ b/sys/dev/usb/input/usbhid.c
@@ -76,7 +76,7 @@
#include "hid_if.h"
static SYSCTL_NODE(_hw_usb, OID_AUTO, usbhid, CTLFLAG_RW, 0, "USB usbhid");
-static int usbhid_enable = 0;
+static int usbhid_enable = 1;
SYSCTL_INT(_hw_usb_usbhid, OID_AUTO, enable, CTLFLAG_RWTUN,
&usbhid_enable, 0, "Enable usbhid and prefer it to other USB HID drivers");
#ifdef USB_DEBUG
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
#include <dev/vmm/vmm_mem.h>
#include <dev/vmm/vmm_stat.h>
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
struct vm_memseg_12 {
int segid;
size_t len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
#define VM_GET_MEMSEG_12 \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+ "COMPAT_FREEBSD14 ABI");
+
+#define VM_ALLOC_MEMSEG_14 \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define VM_GET_MEMSEG_14 \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
struct devmem_softc {
int segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
}
static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+ struct domainset *domainset)
{
char *name;
int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
if (error)
goto done;
}
-
- error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
if (error)
goto done;
@@ -295,6 +311,20 @@ done:
return (error);
}
+#if defined(__amd64__) && \
+ (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+ if (mseg->segid != VM_SYSMEM) {
+ mseg->segid += (VM_BOOTROM - 1);
+ }
+}
+#endif
+
static int
vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_STAT_DESC, 0),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+ VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_REINIT,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vmmdev_softc *sc;
struct vcpu *vcpu;
const struct vmmdev_ioctl *ioctl;
+ struct vm_memseg *mseg;
int error, vcpuid;
sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
case VM_ALLOC_MEMSEG_12:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg_12 *)0)->name));
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_12 *)0)->name), NULL);
break;
case VM_GET_MEMSEG_12:
- error = get_memseg(sc, (struct vm_memseg *)data,
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
-#endif
- case VM_ALLOC_MEMSEG:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+ case VM_ALLOC_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+ break;
+ case VM_GET_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name));
+ break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+ case VM_ALLOC_MEMSEG: {
+ domainset_t *mask;
+ struct domainset *domainset, domain;
+
+ domainset = NULL;
+ mseg = (struct vm_memseg *)data;
+ if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+ if (mseg->ds_mask_size < sizeof(domainset_t) ||
+ mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ memset(&domain, 0, sizeof(domain));
+ mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+ error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ error = domainset_populate(&domain, mask, mseg->ds_policy,
+ mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ domainset = domainset_create(&domain);
+ if (domainset == NULL) {
+ error = EINVAL;
+ free(mask, M_VMMDEV);
+ break;
+ }
+ free(mask, M_VMMDEV);
+ }
+ error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
break;
+ }
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_destroy, "A",
- NULL);
+ "Destroy a vmm(4) instance (legacy interface)");
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_create, "A",
- NULL);
+ "Create a vmm(4) instance (legacy interface)");
static int
vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/sx.h>
#include <sys/systm.h>
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
}
int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset)
{
- struct vm_mem *mem;
struct vm_mem_seg *seg;
+ struct vm_mem *mem;
vm_object_t obj;
mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
return (EINVAL);
}
+ /*
+ * When given an impossible policy, signal an
+ * error to the user.
+ */
+ if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+ return (EINVAL);
obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
if (obj == NULL)
return (ENOMEM);
seg->len = len;
seg->object = obj;
+ if (obj_domainset != NULL)
+ seg->object->domain.dr_policy = obj_domainset;
seg->sysmem = sysmem;
+
return (0);
}
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
#ifndef _DEV_VMM_MEM_H_
#define _DEV_VMM_MEM_H_
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+ VM_SYSMEM = 0,
+ VM_BOOTROM = VM_MAXSYSMEM,
+ VM_FRAMEBUFFER,
+ VM_PCIROM,
+ VM_MEMSEG_END
+};
+
+#define VM_MAX_MEMSEGS VM_MEMSEG_END
+#define VM_MAX_MEMMAPS (VM_MAX_MEMSEGS * 2)
+
#ifdef _KERNEL
#include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
int flags;
};
-#define VM_MAX_MEMSEGS 4
-#define VM_MAX_MEMMAPS 8
-
struct vm_mem {
struct vm_mem_map mem_maps[VM_MAX_MEMMAPS];
struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot, int flags);
int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset);
void vm_free_memseg(struct vm *vm, int ident);
/*
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
/* Reinit VGA mode, to restore view after app which change mode. */
vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
- /* Ask vt(9) to update chars on visible area. */
+ /* Ask vt(4) to update chars on visible area. */
vd->vd_flags |= VDF_INVALID;
}
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
(vw)->vw_number)
static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- "vt(9) parameters");
+ "vt(4) parameters");
static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");