aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/amdsmu/amdsmu.c466
-rw-r--r--sys/dev/amdsmu/amdsmu.h95
-rw-r--r--sys/dev/amdsmu/amdsmu_reg.h84
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c6
-rw-r--r--sys/dev/cxgbe/tom/t4_tls.c4
-rw-r--r--sys/dev/drm2/drm_fb_helper.c2
-rw-r--r--sys/dev/efidev/efirt.c42
-rw-r--r--sys/dev/ice/ice_iov.c17
-rw-r--r--sys/dev/md/md.c23
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c295
-rw-r--r--sys/dev/nvme/nvme_private.h4
-rw-r--r--sys/dev/ofw/ofw_bus_subr.c101
-rw-r--r--sys/dev/qlnx/qlnxe/qlnx_os.c5
-rw-r--r--sys/dev/random/fortuna.c7
-rw-r--r--sys/dev/random/random_harvestq.c232
-rw-r--r--sys/dev/random/random_harvestq.h2
-rw-r--r--sys/dev/random/randomdev.c2
-rw-r--r--sys/dev/vmm/vmm_dev.c135
-rw-r--r--sys/dev/vmm/vmm_mem.c15
-rw-r--r--sys/dev/vmm/vmm_mem.h27
-rw-r--r--sys/dev/vt/hw/vga/vt_vga.c2
-rw-r--r--sys/dev/vt/vt_core.c4
22 files changed, 1388 insertions, 182 deletions
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+ const uint16_t vendorid = pci_get_vendor(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+
+ for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+ const struct amdsmu_product *prod = &amdsmu_products[i];
+
+ if (vendorid == prod->amdsmu_vendorid &&
+ deviceid == prod->amdsmu_deviceid) {
+ if (product_out != NULL)
+ *product_out = prod;
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+ if (device_find_child(parent, "amdsmu", -1) != NULL)
+ return;
+
+ if (amdsmu_match(parent, NULL)) {
+ if (device_add_child(parent, "amdsmu", -1) == NULL)
+ device_printf(parent, "add amdsmu child failed\n");
+ }
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+ if (resource_disabled("amdsmu", 0))
+ return (ENXIO);
+ if (!amdsmu_match(device_get_parent(dev), NULL))
+ return (ENXIO);
+ device_set_descf(dev, "AMD System Management Unit");
+
+ return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /*
+ * The SMU has a response ready for us when the response register is
+ * set. Otherwise, we must wait.
+ */
+ for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+ res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+ if (res != SMU_RES_WAIT)
+ return (res);
+ pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+ C_HARDCLOCK);
+ }
+ device_printf(dev, "timed out waiting for response from SMU\n");
+ return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /* Wait for SMU to be ready. */
+ if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+ return (ETIMEDOUT);
+
+ /* Clear previous response. */
+ amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+ /* Write out command to registers. */
+ amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+ amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+ /* Wait for SMU response and handle it. */
+ res = amdsmu_wait_res(dev);
+
+ switch (res) {
+ case SMU_RES_WAIT:
+ return (ETIMEDOUT);
+ case SMU_RES_OK:
+ if (ret != NULL)
+ *ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+ return (0);
+ case SMU_RES_REJECT_BUSY:
+ device_printf(dev, "SMU is busy\n");
+ return (EBUSY);
+ case SMU_RES_REJECT_PREREQ:
+ case SMU_RES_UNKNOWN:
+ case SMU_RES_FAILED:
+ device_printf(dev, "SMU error: %02x\n", res);
+ return (EIO);
+ }
+
+ return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+ int err;
+ uint32_t smu_vers;
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+ if (err != 0) {
+ device_printf(dev, "failed to get SMU version\n");
+ return (err);
+ }
+ sc->smu_program = (smu_vers >> 24) & 0xFF;
+ sc->smu_maj = (smu_vers >> 16) & 0xFF;
+ sc->smu_min = (smu_vers >> 8) & 0xFF;
+ sc->smu_rev = smu_vers & 0xFF;
+ device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+ sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+ return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+ int err;
+ struct amdsmu_metrics *m = &sc->metrics;
+ bool active;
+ char sysctl_descr[32];
+
+ /* Get IP block count. */
+ switch (deviceid) {
+ case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+ sc->ip_block_count = 12;
+ break;
+ case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+ sc->ip_block_count = 21;
+ break;
+ /* TODO How many IP blocks does Strix Point (and the others) have? */
+ case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+ default:
+ sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+ }
+ KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+ ("too many IP blocks for array"));
+
+ /* Get and print out IP blocks. */
+ err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+ &sc->active_ip_blocks);
+ if (err != 0) {
+ device_printf(dev, "failed to get IP blocks\n");
+ return (err);
+ }
+ device_printf(dev, "Active IP blocks: ");
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ active = (sc->active_ip_blocks & (1 << i)) != 0;
+ sc->ip_blocks_active[i] = active;
+ if (!active)
+ continue;
+ printf("%s%s", amdsmu_ip_blocks_names[i],
+ i + 1 < sc->ip_block_count ? " " : "\n");
+ }
+
+ /* Create a sysctl node for IP blocks. */
+ sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+ CTLFLAG_RD, NULL, "SMU metrics");
+ if (sc->ip_blocks_sysctlnode == NULL) {
+ device_printf(dev, "could not add sysctl node for IP blocks\n");
+ return (ENOMEM);
+ }
+
+ /* Create a sysctl node for each IP block. */
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ /* Create the sysctl node itself for the IP block. */
+ snprintf(sysctl_descr, sizeof sysctl_descr,
+ "Metrics about the %s AMD IP block",
+ amdsmu_ip_blocks_names[i]);
+ sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+ amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+ if (sc->ip_block_sysctlnodes[i] == NULL) {
+ device_printf(dev,
+ "could not add sysctl node for \"%s\"\n", sysctl_descr);
+ continue;
+ }
+ /*
+ * Create sysctls for if the IP block is currently active, last
+ * active time, and total active time.
+ */
+ SYSCTL_ADD_BOOL(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+ "IP block is currently active");
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+ 0, "How long the IP block was active for during the last"
+ " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+ 0, "How long the IP block was active for during sleep in"
+ " total (us)");
+#endif
+ }
+ return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t metrics_addr_lo, metrics_addr_hi;
+ uint64_t metrics_addr;
+
+ /* Get physical address of logging buffer. */
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+ if (err != 0)
+ return (err);
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+ if (err != 0)
+ return (err);
+ metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+ /* Map memory of logging buffer. */
+ err = bus_space_map(sc->bus_tag, metrics_addr,
+ sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+ if (err != 0) {
+ device_printf(dev, "could not map bus space for SMU metrics\n");
+ return (err);
+ }
+
+ /* Start logging for metrics. */
+ amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+ amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+ return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+ if (err != 0) {
+ device_printf(dev, "failed to dump metrics\n");
+ return (err);
+ }
+ bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+ (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+ return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t physbase_addr_lo, physbase_addr_hi;
+ uint64_t physbase_addr;
+ int rid = 0;
+ struct sysctl_oid *node;
+
+ /*
+ * Find physical base address for SMU.
+ * XXX I am a little confused about the masks here. I'm just copying
+ * what Linux does in the amd-pmc driver to get the base address.
+ */
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+ physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+ physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+ physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+ /* Map memory for SMU and its registers. */
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "could not allocate resource\n");
+ return (ENXIO);
+ }
+
+ sc->bus_tag = rman_get_bustag(sc->res);
+
+ if (bus_space_map(sc->bus_tag, physbase_addr,
+ SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU\n");
+ err = ENXIO;
+ goto err_smu_space;
+ }
+ if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+ SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU regs\n");
+ err = ENXIO;
+ goto err_reg_space;
+ }
+
+ /* sysctl stuff. */
+ sc->sysctlctx = device_get_sysctl_ctx(dev);
+ sc->sysctlnode = device_get_sysctl_tree(dev);
+
+ /* Get version & add sysctls. */
+ if ((err = amdsmu_get_vers(dev)) != 0)
+ goto err_dump;
+
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+ "SMU firmware major version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+ "SMU firmware minor version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+ "SMU firmware revision number");
+
+ /* Set up for getting metrics & add sysctls. */
+ if ((err = amdsmu_init_metrics(dev)) != 0)
+ goto err_dump;
+ if ((err = amdsmu_dump_metrics(dev)) != 0)
+ goto err_dump;
+
+ node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+ OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+ if (node == NULL) {
+ device_printf(dev, "could not add sysctl node for metrics\n");
+ err = ENOMEM;
+ goto err_dump;
+ }
+
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+ "SMU metrics table version");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+ "How many times the sleep hint was set");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "s0i3_last_entry_status", CTLFLAG_RD,
+ &sc->metrics.s0i3_last_entry_status, 0,
+ "1 if last S0i3 entry was successful");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+ "Time spent in S0i2 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.time_last_entering_s0i3, 0,
+ "Time spent entering S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.total_time_entering_s0i3, 0,
+ "Total time spent entering S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+ 0, "Time spent resuming from last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+ 0, "Total time spent resuming from sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+ "Time spent in S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+ 0, "Total time spent in S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.time_last_in_sw_drips, 0,
+ "Time spent in awake during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.total_time_in_sw_drips, 0,
+ "Total time spent awake (us)");
+
+ /* Get IP blocks & add sysctls. */
+ err = amdsmu_get_ip_blocks(dev);
+ if (err != 0)
+ goto err_dump;
+
+ /* Get idlemask & add sysctl. */
+ amdsmu_fetch_idlemask(dev);
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+ "value is not documented - only used to help AMD internally debug "
+ "issues");
+
+ return (0);
+err_dump:
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int rid = 0;
+
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+ DEVMETHOD(device_identify, amdsmu_identify),
+ DEVMETHOD(device_probe, amdsmu_probe),
+ DEVMETHOD(device_attach, amdsmu_attach),
+ DEVMETHOD(device_detach, amdsmu_detach),
+ DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+ "amdsmu",
+ amdsmu_methods,
+ sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+ nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define _AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US 50
+#define SMU_RES_READ_MAX 20000
+
+static const struct amdsmu_product {
+ uint16_t amdsmu_vendorid;
+ uint16_t amdsmu_deviceid;
+} amdsmu_products[] = {
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_PHOENIX_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+ "DISPLAY",
+ "CPU",
+ "GFX",
+ "VDD",
+ "ACP",
+ "VCN",
+ "ISP",
+ "NBIO",
+ "DF",
+ "USB3_0",
+ "USB3_1",
+ "LAPIC",
+ "USB3_2",
+ "USB3_3",
+ "USB3_4",
+ "USB4_0",
+ "USB4_1",
+ "MPM",
+ "JPEG",
+ "IPU",
+ "UMSCH",
+ "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+ struct sysctl_ctx_list *sysctlctx;
+ struct sysctl_oid *sysctlnode;
+
+ struct resource *res;
+ bus_space_tag_t bus_tag;
+
+ bus_space_handle_t smu_space;
+ bus_space_handle_t reg_space;
+
+ uint8_t smu_program;
+ uint8_t smu_maj, smu_min, smu_rev;
+
+ uint32_t active_ip_blocks;
+ struct sysctl_oid *ip_blocks_sysctlnode;
+ size_t ip_block_count;
+ struct sysctl_oid *ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+ bool ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+ bus_space_handle_t metrics_space;
+ struct amdsmu_metrics metrics;
+ uint32_t idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+ return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+ bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define _AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out? Also, there are way more of these. I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT 0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT 0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT 0x14A4
+
+#define SMU_INDEX_ADDRESS 0xB8
+#define SMU_INDEX_DATA 0xBC
+
+#define SMU_PHYSBASE_ADDR_LO 0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI 0x13B102EC
+
+#define SMU_MEM_SIZE 0x1000
+#define SMU_REG_SPACE_OFF 0x10000
+
+#define SMU_REG_MESSAGE 0x538
+#define SMU_REG_RESPONSE 0x980
+#define SMU_REG_ARGUMENT 0x9BC
+#define SMU_REG_IDLEMASK 0xD14
+
+enum amdsmu_res {
+ SMU_RES_WAIT = 0x00,
+ SMU_RES_OK = 0x01,
+ SMU_RES_REJECT_BUSY = 0xFC,
+ SMU_RES_REJECT_PREREQ = 0xFD,
+ SMU_RES_UNKNOWN = 0xFE,
+ SMU_RES_FAILED = 0xFF,
+};
+
+enum amdsmu_msg {
+ SMU_MSG_GETSMUVERSION = 0x02,
+ SMU_MSG_LOG_GETDRAM_ADDR_HI = 0x04,
+ SMU_MSG_LOG_GETDRAM_ADDR_LO = 0x05,
+ SMU_MSG_LOG_START = 0x06,
+ SMU_MSG_LOG_RESET = 0x07,
+ SMU_MSG_LOG_DUMP_DATA = 0x08,
+ SMU_MSG_GET_SUP_CONSTRAINTS = 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+ uint32_t table_version;
+ uint32_t hint_count;
+ uint32_t s0i3_last_entry_status;
+ uint32_t time_last_in_s0i2;
+ uint64_t time_last_entering_s0i3;
+ uint64_t total_time_entering_s0i3;
+ uint64_t time_last_resuming;
+ uint64_t total_time_resuming;
+ uint64_t time_last_in_s0i3;
+ uint64_t total_time_in_s0i3;
+ uint64_t time_last_in_sw_drips;
+ uint64_t total_time_in_sw_drips;
+ /*
+ * This is how long each IP block was active for (us), i.e., blocking
+ * entry to S0i3. In Linux, these are called "timecondition_notmet_*".
+ *
+ * XXX Total active time for IP blocks seems to be buggy and reporting
+ * garbage (at least on Phoenix), so it's disabled for now. The last
+ * active time for the USB4_0 IP block also seems to be buggy.
+ */
+ uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 8547f21586e1..7a6b1cbdd736 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -703,7 +703,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
for (m = sndptr; m != NULL; m = m->m_next) {
int n;
- if ((m->m_flags & M_NOTAVAIL) != 0)
+ if ((m->m_flags & M_NOTREADY) != 0)
break;
if (m->m_flags & M_EXTPG) {
#ifdef KERN_TLS
@@ -787,7 +787,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
/* nothing to send */
if (plen == 0) {
- KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
+ KASSERT(m == NULL || (m->m_flags & M_NOTREADY) != 0,
("%s: nothing to send, but m != NULL is ready",
__func__));
break;
@@ -880,7 +880,7 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
toep->txsd_avail--;
t4_l2t_send(sc, wr, toep->l2te);
- } while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
+ } while (m != NULL && (m->m_flags & M_NOTREADY) == 0);
/* Send a FIN if requested, but only if there's no more data to send */
if (m == NULL && toep->flags & TPF_SEND_FIN)
diff --git a/sys/dev/cxgbe/tom/t4_tls.c b/sys/dev/cxgbe/tom/t4_tls.c
index c6377980fca9..27c16b9988ae 100644
--- a/sys/dev/cxgbe/tom/t4_tls.c
+++ b/sys/dev/cxgbe/tom/t4_tls.c
@@ -563,7 +563,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
* If there is no ready data to send, wait until more
* data arrives.
*/
- if (m == NULL || (m->m_flags & M_NOTAVAIL) != 0) {
+ if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
if (sowwakeup)
sowwakeup_locked(so);
else
@@ -614,7 +614,7 @@ t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
/* Shove if there is no additional data pending. */
shove = ((m->m_next == NULL ||
- (m->m_next->m_flags & M_NOTAVAIL) != 0)) &&
+ (m->m_next->m_flags & M_NOTREADY) != 0)) &&
(tp->t_flags & TF_MORETOCOME) == 0;
if (sb->sb_flags & SB_AUTOSIZE &&
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
struct task fb_mode_task;
};
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
static void
vt_restore_fbdev_mode(void *arg, int pending)
{
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
enum efi_table_type {
TYPE_ESRT = 0,
- TYPE_PROP
+ TYPE_PROP,
+ TYPE_MEMORY_ATTR
};
static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
free(buf, M_TEMP);
return (0);
}
+ case TYPE_MEMORY_ATTR:
+ {
+ efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+ struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+ int error;
+ void *buf;
+ size_t len = sizeof(struct efi_memory_attribute_table);
+
+ error = efi_get_table(&guid, (void **)&tbl_addr);
+ if (error)
+ return (error);
+
+ buf = malloc(len, M_TEMP, M_WAITOK);
+ error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+ if (error) {
+ free(buf, M_TEMP);
+ return (error);
+ }
+
+ mem_addr = (struct efi_memory_attribute_table *)buf;
+ if (mem_addr->version != 2) {
+ free(buf, M_TEMP);
+ return (EINVAL);
+ }
+ len += mem_addr->descriptor_size * mem_addr->num_ents;
+ if (len > EFI_TABLE_ALLOC_MAX) {
+ free(buf, M_TEMP);
+ return (ENOMEM);
+ }
+
+ *table_len = len;
+ if (taddr != NULL)
+ *taddr = tbl_addr;
+ free(buf, M_TEMP);
+ return (0);
+ }
}
return (ENOENT);
}
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
enum efi_table_type type;
} tables[] = {
{ EFI_TABLE_ESRT, TYPE_ESRT },
- { EFI_PROPERTIES_TABLE, TYPE_PROP }
+ { EFI_PROPERTIES_TABLE, TYPE_PROP },
+ { EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
};
size_t table_idx;
void *taddr;
diff --git a/sys/dev/ice/ice_iov.c b/sys/dev/ice/ice_iov.c
index e06c7eb56f7a..c5a3e1060e44 100644
--- a/sys/dev/ice/ice_iov.c
+++ b/sys/dev/ice/ice_iov.c
@@ -345,7 +345,7 @@ ice_iov_add_vf(struct ice_softc *sc, uint16_t vfnum, const nvlist_t *params)
if (nvlist_exists_binary(params, "mac-addr")) {
mac = nvlist_get_binary(params, "mac-addr", &size);
- bcopy(mac, vf->mac, ETHER_ADDR_LEN);
+ memcpy(vf->mac, mac, ETHER_ADDR_LEN);
if (nvlist_get_bool(params, "allow-set-mac"))
vf->vf_flags |= VF_FLAG_SET_MAC_CAP;
@@ -617,12 +617,14 @@ ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
{
struct ice_hw *hw = &sc->hw;
struct virtchnl_vf_resource *vf_res;
+ struct virtchnl_vsi_resource *vsi_res;
u16 vf_res_len;
u32 vf_caps;
/* XXX: Only support one VSI per VF, so this size doesn't need adjusting */
vf_res_len = sizeof(struct virtchnl_vf_resource);
- vf_res = (struct virtchnl_vf_resource *)malloc(vf_res_len, M_ICE, M_WAITOK | M_ZERO);
+ vf_res = (struct virtchnl_vf_resource *)malloc(vf_res_len, M_ICE,
+ M_WAITOK | M_ZERO);
vf_res->num_vsis = 1;
vf_res->num_queue_pairs = vf->vsi->num_tx_queues;
@@ -643,10 +645,13 @@ ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
vf_res->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
}
- vf_res->vsi_res[0].vsi_id = vf->vsi->idx;
- vf_res->vsi_res[0].num_queue_pairs = vf->vsi->num_tx_queues;
- vf_res->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
- vf_res->vsi_res[0].qset_handle = 0;
+ vsi_res = &vf_res->vsi_res[0];
+ vsi_res->vsi_id = vf->vsi->idx;
+ vsi_res->num_queue_pairs = vf->vsi->num_tx_queues;
+ vsi_res->vsi_type = VIRTCHNL_VSI_SRIOV;
+ vsi_res->qset_handle = 0;
+ if (!ETHER_IS_ZERO(vf->mac))
+ memcpy(vsi_res->default_mac_addr, vf->mac, ETHER_ADDR_LEN);
ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_GET_VF_RESOURCES,
VIRTCHNL_STATUS_SUCCESS, (u8 *)vf_res, vf_res_len, NULL);
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 29dc0c880e3a..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -89,6 +89,8 @@
#include <sys/unistd.h>
#include <sys/vnode.h>
#include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
#include <geom/geom.h>
#include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
{
caddr_t mod;
u_char *ptr, *name, *type;
+ u_char scratch[40];
unsigned len;
int i;
+ vm_offset_t paddr;
/* figure out log2(NINDIR) */
for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
sx_xunlock(&md_sx);
}
}
+
+ /*
+ * Load up to 32 pre-loaded disks
+ */
+ for (int i = 0; i < 32; i++) {
+ if (resource_long_value("md", i, "physaddr",
+ (long *) &paddr) != 0 ||
+ resource_int_value("md", i, "len", &len) != 0)
+ break;
+ ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+ if (ptr != NULL && len != 0) {
+ sprintf(scratch, "preload%d 0x%016jx", i,
+ (uintmax_t)paddr);
+ sx_xlock(&md_sx);
+ md_preloaded(ptr, len, scratch);
+ sx_xunlock(&md_sx);
+ }
+ }
+
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
0600, MDCTL_NAME);
g_topology_lock();
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
- struct nvme_async_event_request *aer);
+ struct nvme_async_event_request *aer);
static void
nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
}
static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
- struct nvme_async_event_request *aer = arg;
- struct nvme_health_information_page *health_info;
- struct nvme_ns_list *nsl;
- struct nvme_error_information_entry *err;
- int i;
-
- /*
- * If the log page fetch for some reason completed with an error,
- * don't pass log page data to the consumers. In practice, this case
- * should never happen.
- */
- if (nvme_completion_is_error(cpl))
- nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
- aer->log_page_id, NULL, 0);
- else {
- /* Convert data to host endian */
- switch (aer->log_page_id) {
- case NVME_LOG_ERROR:
- err = (struct nvme_error_information_entry *)aer->log_page_buffer;
- for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
- nvme_error_information_entry_swapbytes(err++);
- break;
- case NVME_LOG_HEALTH_INFORMATION:
- nvme_health_information_page_swapbytes(
- (struct nvme_health_information_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_CHANGED_NAMESPACE:
- nvme_ns_list_swapbytes(
- (struct nvme_ns_list *)aer->log_page_buffer);
- break;
- case NVME_LOG_COMMAND_EFFECT:
- nvme_command_effects_page_swapbytes(
- (struct nvme_command_effects_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_RES_NOTIFICATION:
- nvme_res_notification_page_swapbytes(
- (struct nvme_res_notification_page *)aer->log_page_buffer);
- break;
- case NVME_LOG_SANITIZE_STATUS:
- nvme_sanitize_status_page_swapbytes(
- (struct nvme_sanitize_status_page *)aer->log_page_buffer);
- break;
- default:
- break;
- }
-
- if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
- health_info = (struct nvme_health_information_page *)
- aer->log_page_buffer;
- nvme_ctrlr_log_critical_warnings(aer->ctrlr,
- health_info->critical_warning);
- /*
- * Critical warnings reported through the
- * SMART/health log page are persistent, so
- * clear the associated bits in the async event
- * config so that we do not receive repeated
- * notifications for the same event.
- */
- aer->ctrlr->async_event_config &=
- ~health_info->critical_warning;
- nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
- aer->ctrlr->async_event_config, NULL, NULL);
- } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
- !nvme_use_nvd) {
- nsl = (struct nvme_ns_list *)aer->log_page_buffer;
- for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
- if (nsl->ns[i] > NVME_MAX_NAMESPACES)
- break;
- nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
- }
- }
-
- /*
- * Pass the cpl data from the original async event completion,
- * not the log page fetch.
- */
- nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
- aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
- }
-
- /*
- * Repost another asynchronous event request to replace the one
- * that just completed.
- */
- nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
{
struct nvme_async_event_request *aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
return;
}
- /* Associated log page is in bits 23:16 of completion entry dw0. */
+ /*
+ * Save the completion status and associated log page is in bits 23:16
+ * of completion entry dw0. Print a message and queue it for further
+ * processing.
+ */
+ memcpy(&aer->cpl, cpl, sizeof(*cpl));
aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
" page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
aer->log_page_id);
-
- if (is_log_page_id_valid(aer->log_page_id)) {
- aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
- aer->log_page_id);
- memcpy(&aer->cpl, cpl, sizeof(*cpl));
- nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
- NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
- aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
- aer);
- /* Wait to notify consumers until after log page is fetched. */
- } else {
- nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
- NULL, 0);
-
- /*
- * Repost another asynchronous event request to replace the one
- * that just completed.
- */
- nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
- }
+ taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
}
static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
{
struct nvme_request *req;
- aer->ctrlr = ctrlr;
/*
- * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
- * callback context. AER completions should be handled on a dedicated
- * thread.
+ * We're racing the reset thread, so let that process submit this again.
+ * XXX does this really solve that race? And is that race even possible
+ * since we only reset when we've no theard from the card in a long
+ * time. Why would we get an AER in the middle of that just before we
+ * kick off the reset?
*/
- req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+ if (ctrlr->is_resetting)
+ return;
+
+ aer->ctrlr = ctrlr;
+ req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
aer);
aer->req = req;
+ aer->log_page_id = 0; /* Not a valid page */
/*
* Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
}
+static void
+nvme_ctrlr_aer_done(void *arg, const struct nvme_completion *cpl)
+{
+ struct nvme_async_event_request *aer = arg;
+
+ mtx_lock(&aer->mtx);
+ if (nvme_completion_is_error(cpl))
+ aer->log_page_size = (uint32_t)-1;
+ else
+ aer->log_page_size = nvme_ctrlr_get_log_page_size(
+ aer->ctrlr, aer->log_page_id);
+ wakeup(aer);
+ mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+ struct nvme_async_event_request *aer = arg;
+ struct nvme_controller *ctrlr = aer->ctrlr;
+ uint32_t len;
+
+ /*
+ * We're resetting, so just punt.
+ */
+ if (ctrlr->is_resetting)
+ return;
+
+ if (!is_log_page_id_valid(aer->log_page_id)) {
+ /*
+ * Repost another asynchronous event request to replace the one
+ * that just completed.
+ */
+ nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+ NULL, 0);
+ nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+ goto out;
+ }
+
+ aer->log_page_size = 0;
+ len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+ nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+ NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+ nvme_ctrlr_aer_done, aer);
+ mtx_lock(&aer->mtx);
+ while (aer->log_page_size == 0)
+ mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+ mtx_unlock(&aer->mtx);
+
+ if (aer->log_page_size != (uint32_t)-1) {
+ /*
+ * If the log page fetch for some reason completed with an
+ * error, don't pass log page data to the consumers. In
+ * practice, this case should never happen.
+ */
+ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+ aer->log_page_id, NULL, 0);
+ goto out;
+ }
+
+ /* Convert data to host endian */
+ switch (aer->log_page_id) {
+ case NVME_LOG_ERROR: {
+ struct nvme_error_information_entry *err =
+ (struct nvme_error_information_entry *)aer->log_page_buffer;
+ for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+ nvme_error_information_entry_swapbytes(err++);
+ break;
+ }
+ case NVME_LOG_HEALTH_INFORMATION:
+ nvme_health_information_page_swapbytes(
+ (struct nvme_health_information_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_CHANGED_NAMESPACE:
+ nvme_ns_list_swapbytes(
+ (struct nvme_ns_list *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_COMMAND_EFFECT:
+ nvme_command_effects_page_swapbytes(
+ (struct nvme_command_effects_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_RES_NOTIFICATION:
+ nvme_res_notification_page_swapbytes(
+ (struct nvme_res_notification_page *)aer->log_page_buffer);
+ break;
+ case NVME_LOG_SANITIZE_STATUS:
+ nvme_sanitize_status_page_swapbytes(
+ (struct nvme_sanitize_status_page *)aer->log_page_buffer);
+ break;
+ default:
+ break;
+ }
+
+ if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+ struct nvme_health_information_page *health_info =
+ (struct nvme_health_information_page *)aer->log_page_buffer;
+
+ /*
+ * Critical warnings reported through the SMART/health log page
+ * are persistent, so clear the associated bits in the async
+ * event config so that we do not receive repeated notifications
+ * for the same event.
+ */
+ nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+ health_info->critical_warning);
+ aer->ctrlr->async_event_config &=
+ ~health_info->critical_warning;
+ nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+ aer->ctrlr->async_event_config, NULL, NULL);
+ } else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+ struct nvme_ns_list *nsl =
+ (struct nvme_ns_list *)aer->log_page_buffer;
+ for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+ if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+ break;
+ nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+ }
+ }
+
+ /*
+ * Pass the cpl data from the original async event completion, not the
+ * log page fetch.
+ */
+ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+ aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+ /*
+ * Repost another asynchronous event request to replace the one
+ * that just completed.
+ */
+out:
+ nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
/*
* Poll all the queues enabled on the device for completion.
*/
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
/*
* Create 2 threads for the taskqueue. The reset thread will block when
* it detects that the controller has failed until all I/O has been
- * failed up the stack. The fail_req task needs to be able to run in
- * this case to finish the request failure for some cases.
- *
- * We could partially solve this race by draining the failed requeust
- * queue before proceding to free the sim, though nothing would stop
- * new I/O from coming in after we do that drain, but before we reach
- * cam_sim_free, so this big hammer is used instead.
+ * failed up the stack. The second thread is used for AER events, which
+ * can block, but only briefly for memory and log page fetching.
*/
ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
ctrlr->is_initialized = false;
ctrlr->notification_sent = 0;
TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
- STAILQ_INIT(&ctrlr->fail_req);
+ for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+ struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+ TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+ mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+ }
ctrlr->is_failed = false;
make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
}
noadminq:
- if (ctrlr->taskqueue)
+ if (ctrlr->taskqueue) {
taskqueue_free(ctrlr->taskqueue);
+ for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+ struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+ mtx_destroy(&aer->mtx);
+ }
+ }
if (ctrlr->tag)
bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
struct nvme_async_event_request {
struct nvme_controller *ctrlr;
struct nvme_request *req;
+ struct task task;
+ struct mtx mtx;
struct nvme_completion cpl;
uint32_t log_page_id;
uint32_t log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
bool isr_warned;
bool is_initialized;
- STAILQ_HEAD(, nvme_request) fail_req;
-
/* Host Memory Buffer */
int hmb_nchunks;
size_t hmb_chunk;
diff --git a/sys/dev/ofw/ofw_bus_subr.c b/sys/dev/ofw/ofw_bus_subr.c
index 4d0479dfb957..b99d784929bc 100644
--- a/sys/dev/ofw/ofw_bus_subr.c
+++ b/sys/dev/ofw/ofw_bus_subr.c
@@ -634,11 +634,89 @@ ofw_bus_find_iparent(phandle_t node)
return (iparent);
}
+static phandle_t
+ofw_bus_search_iparent(phandle_t node)
+{
+ phandle_t iparent;
+
+ do {
+ if (OF_getencprop(node, "interrupt-parent", &iparent,
+ sizeof(iparent)) > 0) {
+ node = OF_node_from_xref(iparent);
+ } else {
+ node = OF_parent(node);
+ }
+ if (node == 0)
+ return (0);
+ } while (!OF_hasprop(node, "#interrupt-cells"));
+
+ return (OF_xref_from_node(node));
+}
+
+static int
+ofw_bus_traverse_imap(phandle_t inode, phandle_t node, uint32_t *intr,
+ int intrsz, pcell_t *res, int ressz, phandle_t *iparentp)
+{
+ struct ofw_bus_iinfo ii;
+ void *reg;
+ uint32_t *intrp;
+ phandle_t iparent;
+ int rv = 0;
+
+ /* We already have an interrupt controller */
+ if (OF_hasprop(node, "interrupt-controller"))
+ return (0);
+
+ intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+ memcpy(intrp, intr, intrsz);
+
+ while (true) {
+ /* There is no interrupt-map to follow */
+ if (!OF_hasprop(inode, "interrupt-map")) {
+ free(intrp, M_OFWPROP);
+ return (0);
+ }
+
+ memset(&ii, 0, sizeof(ii));
+ ofw_bus_setup_iinfo(inode, &ii, sizeof(cell_t));
+
+ reg = NULL;
+ if (ii.opi_addrc > 0)
+ reg = malloc(ii.opi_addrc, M_OFWPROP, M_WAITOK);
+
+ rv = ofw_bus_lookup_imap(node, &ii, reg, ii.opi_addrc, intrp,
+ intrsz, res, ressz, &iparent);
+
+ free(reg, M_OFWPROP);
+ free(ii.opi_imap, M_OFWPROP);
+ free(ii.opi_imapmsk, M_OFWPROP);
+ free(intrp, M_OFWPROP);
+
+ if (rv == 0)
+ return (0);
+
+ node = inode;
+ inode = OF_node_from_xref(iparent);
+
+ /* Stop when we have an interrupt controller */
+ if (OF_hasprop(inode, "interrupt-controller")) {
+ *iparentp = iparent;
+ return (rv);
+ }
+
+ intrsz = rv * sizeof(pcell_t);
+ intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+ memcpy(intrp, res, intrsz);
+ }
+}
+
int
ofw_bus_intr_to_rl(device_t dev, phandle_t node,
struct resource_list *rl, int *rlen)
{
- phandle_t iparent;
+ phandle_t iparent, iparent_node;
+ uint32_t result[16];
+ uint32_t intrpcells, *intrp;
uint32_t icells, *intr;
int err, i, irqnum, nintr, rid;
bool extended;
@@ -646,15 +724,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
nintr = OF_getencprop_alloc_multi(node, "interrupts", sizeof(*intr),
(void **)&intr);
if (nintr > 0) {
- iparent = ofw_bus_find_iparent(node);
+ iparent = ofw_bus_search_iparent(node);
if (iparent == 0) {
device_printf(dev, "No interrupt-parent found, "
"assuming direct parent\n");
iparent = OF_parent(node);
iparent = OF_xref_from_node(iparent);
}
- if (OF_searchencprop(OF_node_from_xref(iparent),
- "#interrupt-cells", &icells, sizeof(icells)) == -1) {
+ iparent_node = OF_node_from_xref(iparent);
+ if (OF_searchencprop(iparent_node, "#interrupt-cells", &icells,
+ sizeof(icells)) == -1) {
device_printf(dev, "Missing #interrupt-cells "
"property, assuming <1>\n");
icells = 1;
@@ -677,7 +756,8 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
for (i = 0; i < nintr; i += icells) {
if (extended) {
iparent = intr[i++];
- if (OF_searchencprop(OF_node_from_xref(iparent),
+ iparent_node = OF_node_from_xref(iparent);
+ if (OF_searchencprop(iparent_node,
"#interrupt-cells", &icells, sizeof(icells)) == -1) {
device_printf(dev, "Missing #interrupt-cells "
"property\n");
@@ -691,7 +771,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
break;
}
}
- irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]);
+
+ intrp = &intr[i];
+ intrpcells = ofw_bus_traverse_imap(iparent_node, node, intrp,
+ icells * sizeof(intr[0]), result, sizeof(result), &iparent);
+ if (intrpcells > 0)
+ intrp = result;
+ else
+ intrpcells = icells;
+
+ irqnum = ofw_bus_map_intr(dev, iparent, intrpcells, intrp);
resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1);
}
if (rlen != NULL)
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 9d23d5df1d2b..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -2308,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
if_setbaudrate(ifp, IF_Gbps(100));
- if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
if_setinitfn(ifp, qlnx_init);
if_setsoftc(ifp, ha);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2343,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
if_setcapabilities(ifp, IFCAP_HWCSUM);
if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2352,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+ if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
if_sethwtsomax(ifp, QLNX_MAX_TSO_FRAME_SIZE -
(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c
index c4282c723a44..8363de99a60a 100644
--- a/sys/dev/random/fortuna.c
+++ b/sys/dev/random/fortuna.c
@@ -341,6 +341,13 @@ random_fortuna_process_event(struct harvest_event *event)
u_int pl;
RANDOM_RESEED_LOCK();
+ /*
+ * Run SP 800-90B health tests on the source if so configured.
+ */
+ if (!random_harvest_healthtest(event)) {
+ RANDOM_RESEED_UNLOCK();
+ return;
+ }
/*-
* FS&K - P_i = P_i|<harvested stuff>
* Accumulate the event into the appropriate pool
diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index 395310b115fb..c7762967c4fb 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -88,6 +88,8 @@ static void random_sources_feed(void);
static __read_mostly bool epoch_inited;
static __read_mostly epoch_t rs_epoch;
+static const char *random_source_descr[ENTROPYSOURCE];
+
/*
* How many events to queue up. We create this many items in
* an 'empty' queue, then transfer them to the 'harvest' queue with
@@ -299,6 +301,230 @@ random_sources_feed(void)
explicit_bzero(entropy, sizeof(entropy));
}
+/*
+ * State used for conducting NIST SP 800-90B health tests on entropy sources.
+ */
+static struct health_test_softc {
+ uint32_t ht_rct_value[HARVESTSIZE + 1];
+ u_int ht_rct_count; /* number of samples with the same value */
+ u_int ht_rct_limit; /* constant after init */
+
+ uint32_t ht_apt_value[HARVESTSIZE + 1];
+ u_int ht_apt_count; /* number of samples with the same value */
+ u_int ht_apt_seq; /* sequence number of the last sample */
+ u_int ht_apt_cutoff; /* constant after init */
+
+ uint64_t ht_total_samples;
+ bool ondemand; /* Set to true to restart the state machine */
+ enum {
+ INIT = 0, /* initial state */
+ DISABLED, /* health checking is disabled */
+ STARTUP, /* doing startup tests, samples are discarded */
+ STEADY, /* steady-state operation */
+ FAILED, /* health check failed, discard samples */
+ } ht_state;
+} healthtest[ENTROPYSOURCE];
+
+#define RANDOM_SELFTEST_STARTUP_SAMPLES 1024 /* 4.3, requirement 4 */
+#define RANDOM_SELFTEST_APT_WINDOW 512 /* 4.4.2 */
+
+static void
+copy_event(uint32_t dst[static HARVESTSIZE + 1],
+ const struct harvest_event *event)
+{
+ memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1));
+ memcpy(dst, event->he_entropy, event->he_size);
+ dst[HARVESTSIZE] = event->he_somecounter;
+}
+
+static void
+random_healthtest_rct_init(struct health_test_softc *ht,
+ const struct harvest_event *event)
+{
+ ht->ht_rct_count = 1;
+ copy_event(ht->ht_rct_value, event);
+}
+
+/*
+ * Apply the repitition count test to a sample.
+ *
+ * Return false if the test failed, i.e., we observed >= C consecutive samples
+ * with the same value, and true otherwise.
+ */
+static bool
+random_healthtest_rct_next(struct health_test_softc *ht,
+ const struct harvest_event *event)
+{
+ uint32_t val[HARVESTSIZE + 1];
+
+ copy_event(val, event);
+ if (memcmp(val, ht->ht_rct_value, sizeof(ht->ht_rct_value)) != 0) {
+ ht->ht_rct_count = 1;
+ memcpy(ht->ht_rct_value, val, sizeof(ht->ht_rct_value));
+ return (true);
+ } else {
+ ht->ht_rct_count++;
+ return (ht->ht_rct_count < ht->ht_rct_limit);
+ }
+}
+
+static void
+random_healthtest_apt_init(struct health_test_softc *ht,
+ const struct harvest_event *event)
+{
+ ht->ht_apt_count = 1;
+ ht->ht_apt_seq = 1;
+ copy_event(ht->ht_apt_value, event);
+}
+
+static bool
+random_healthtest_apt_next(struct health_test_softc *ht,
+ const struct harvest_event *event)
+{
+ uint32_t val[HARVESTSIZE + 1];
+
+ if (ht->ht_apt_seq == 0) {
+ random_healthtest_apt_init(ht, event);
+ return (true);
+ }
+
+ copy_event(val, event);
+ if (memcmp(val, ht->ht_apt_value, sizeof(ht->ht_apt_value)) == 0) {
+ ht->ht_apt_count++;
+ if (ht->ht_apt_count >= ht->ht_apt_cutoff)
+ return (false);
+ }
+
+ ht->ht_apt_seq++;
+ if (ht->ht_apt_seq == RANDOM_SELFTEST_APT_WINDOW)
+ ht->ht_apt_seq = 0;
+
+ return (true);
+}
+
+/*
+ * Run the health tests for the given event. This is assumed to be called from
+ * a serialized context.
+ */
+bool
+random_harvest_healthtest(const struct harvest_event *event)
+{
+ struct health_test_softc *ht;
+
+ ht = &healthtest[event->he_source];
+
+ /*
+ * Was on-demand testing requested? Restart the state machine if so,
+ * restarting the startup tests.
+ */
+ if (atomic_load_bool(&ht->ondemand)) {
+ atomic_store_bool(&ht->ondemand, false);
+ ht->ht_state = INIT;
+ }
+
+ switch (ht->ht_state) {
+ case __predict_false(INIT):
+ /* Store the first sample and initialize test state. */
+ random_healthtest_rct_init(ht, event);
+ random_healthtest_apt_init(ht, event);
+ ht->ht_total_samples = 0;
+ ht->ht_state = STARTUP;
+ return (false);
+ case DISABLED:
+ /* No health testing for this source. */
+ return (true);
+ case STEADY:
+ case STARTUP:
+ ht->ht_total_samples++;
+ if (random_healthtest_rct_next(ht, event) &&
+ random_healthtest_apt_next(ht, event)) {
+ if (ht->ht_state == STARTUP &&
+ ht->ht_total_samples >=
+ RANDOM_SELFTEST_STARTUP_SAMPLES) {
+ printf(
+ "random: health test passed for source %s\n",
+ random_source_descr[event->he_source]);
+ ht->ht_state = STEADY;
+ }
+ return (ht->ht_state == STEADY);
+ }
+ ht->ht_state = FAILED;
+ printf(
+ "random: health test failed for source %s, discarding samples\n",
+ random_source_descr[event->he_source]);
+ /* FALLTHROUGH */
+ case FAILED:
+ return (false);
+ }
+}
+
+static bool nist_healthtest_enabled = false;
+SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled,
+ CTLFLAG_RDTUN, &nist_healthtest_enabled, 0,
+ "Enable NIST SP 800-90B health tests for noise sources");
+
+static void
+random_healthtest_init(enum random_entropy_source source)
+{
+ struct health_test_softc *ht;
+
+ ht = &healthtest[source];
+ KASSERT(ht->ht_state == INIT,
+ ("%s: health test state is %d for source %d",
+ __func__, ht->ht_state, source));
+
+ /*
+ * If health-testing is enabled, validate all sources except CACHED and
+ * VMGENID: they are deterministic sources used only a small, fixed
+ * number of times, so statistical testing is not applicable.
+ */
+ if (!nist_healthtest_enabled ||
+ source == RANDOM_CACHED || source == RANDOM_PURE_VMGENID) {
+ ht->ht_state = DISABLED;
+ return;
+ }
+
+ /*
+ * Set cutoff values for the two tests, assuming that each sample has
+ * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}.
+ * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false
+ * positive once in ~54.5 years.
+ *
+ * The RCT limit comes from the formula in section 4.4.1.
+ *
+ * The APT cutoff is calculated using the formula in section 4.4.2
+ * footnote 10 with the window size changed from 512 to 511, since the
+ * test as written counts the number of samples equal to the first
+ * sample in the window, and thus tests W-1 samples.
+ */
+ ht->ht_rct_limit = 35;
+ ht->ht_apt_cutoff = 330;
+}
+
+static int
+random_healthtest_ondemand(SYSCTL_HANDLER_ARGS)
+{
+ u_int mask, source;
+ int error;
+
+ mask = 0;
+ error = sysctl_handle_int(oidp, &mask, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ while (mask != 0) {
+ source = ffs(mask) - 1;
+ if (source < nitems(healthtest))
+ atomic_store_bool(&healthtest[source].ondemand, true);
+ mask &= ~(1u << source);
+ }
+ return (0);
+}
+SYSCTL_PROC(_kern_random, OID_AUTO, nist_healthtest_ondemand,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
+ random_healthtest_ondemand, "I",
+ "Re-run NIST SP 800-90B startup health tests for a noise source");
+
static int
random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
{
@@ -362,7 +588,8 @@ static const char *random_source_descr[ENTROPYSOURCE] = {
[RANDOM_SWI] = "SWI",
[RANDOM_FS_ATIME] = "FS_ATIME",
[RANDOM_UMA] = "UMA",
- [RANDOM_CALLOUT] = "CALLOUT", /* ENVIRONMENTAL_END */
+ [RANDOM_CALLOUT] = "CALLOUT",
+ [RANDOM_RANDOMDEV] = "RANDOMDEV", /* ENVIRONMENTAL_END */
[RANDOM_PURE_OCTEON] = "PURE_OCTEON", /* PURE_START */
[RANDOM_PURE_SAFE] = "PURE_SAFE",
[RANDOM_PURE_GLXSB] = "PURE_GLXSB",
@@ -424,6 +651,9 @@ random_harvestq_init(void *unused __unused)
hc_source_mask = almost_everything_mask;
RANDOM_HARVEST_INIT_LOCK();
harvest_context.hc_active_buf = 0;
+
+ for (int i = 0; i < ENTROPYSOURCE; i++)
+ random_healthtest_init(i);
}
SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL);
diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h
index 7804bf52aa4f..1d462500df85 100644
--- a/sys/dev/random/random_harvestq.h
+++ b/sys/dev/random/random_harvestq.h
@@ -49,4 +49,6 @@ random_get_cyclecount(void)
return ((uint32_t)get_cyclecount());
}
+bool random_harvest_healthtest(const struct harvest_event *event);
+
#endif /* SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED */
diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c
index 9d1c7b1167c8..ced4dd8067d9 100644
--- a/sys/dev/random/randomdev.c
+++ b/sys/dev/random/randomdev.c
@@ -312,7 +312,7 @@ randomdev_accumulate(uint8_t *buf, u_int count)
for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) {
event.he_somecounter = random_get_cyclecount();
event.he_size = sizeof(event.he_entropy);
- event.he_source = RANDOM_CACHED;
+ event.he_source = RANDOM_RANDOMDEV;
event.he_destination = destination++; /* Harmless cheating */
memcpy(event.he_entropy, entropy_data + i, sizeof(event.he_entropy));
p_random_alg_context->ra_event_processor(&event);
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
#include <dev/vmm/vmm_mem.h>
#include <dev/vmm/vmm_stat.h>
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
struct vm_memseg_12 {
int segid;
size_t len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
#define VM_GET_MEMSEG_12 \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+ "COMPAT_FREEBSD14 ABI");
+
+#define VM_ALLOC_MEMSEG_14 \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define VM_GET_MEMSEG_14 \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
struct devmem_softc {
int segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
}
static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+ struct domainset *domainset)
{
char *name;
int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
if (error)
goto done;
}
-
- error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
if (error)
goto done;
@@ -295,6 +311,20 @@ done:
return (error);
}
+#if defined(__amd64__) && \
+ (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+ if (mseg->segid != VM_SYSMEM) {
+ mseg->segid += (VM_BOOTROM - 1);
+ }
+}
+#endif
+
static int
vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_STAT_DESC, 0),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+ VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_REINIT,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vmmdev_softc *sc;
struct vcpu *vcpu;
const struct vmmdev_ioctl *ioctl;
+ struct vm_memseg *mseg;
int error, vcpuid;
sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
case VM_ALLOC_MEMSEG_12:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg_12 *)0)->name));
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_12 *)0)->name), NULL);
break;
case VM_GET_MEMSEG_12:
- error = get_memseg(sc, (struct vm_memseg *)data,
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
-#endif
- case VM_ALLOC_MEMSEG:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+ case VM_ALLOC_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+ break;
+ case VM_GET_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name));
+ break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+ case VM_ALLOC_MEMSEG: {
+ domainset_t *mask;
+ struct domainset *domainset, domain;
+
+ domainset = NULL;
+ mseg = (struct vm_memseg *)data;
+ if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+ if (mseg->ds_mask_size < sizeof(domainset_t) ||
+ mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ memset(&domain, 0, sizeof(domain));
+ mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+ error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ error = domainset_populate(&domain, mask, mseg->ds_policy,
+ mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ domainset = domainset_create(&domain);
+ if (domainset == NULL) {
+ error = EINVAL;
+ free(mask, M_VMMDEV);
+ break;
+ }
+ free(mask, M_VMMDEV);
+ }
+ error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
break;
+ }
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_destroy, "A",
- NULL);
+ "Destroy a vmm(4) instance (legacy interface)");
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_create, "A",
- NULL);
+ "Create a vmm(4) instance (legacy interface)");
static int
vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/sx.h>
#include <sys/systm.h>
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
}
int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset)
{
- struct vm_mem *mem;
struct vm_mem_seg *seg;
+ struct vm_mem *mem;
vm_object_t obj;
mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
return (EINVAL);
}
+ /*
+ * When given an impossible policy, signal an
+ * error to the user.
+ */
+ if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+ return (EINVAL);
obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
if (obj == NULL)
return (ENOMEM);
seg->len = len;
seg->object = obj;
+ if (obj_domainset != NULL)
+ seg->object->domain.dr_policy = obj_domainset;
seg->sysmem = sysmem;
+
return (0);
}
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
#ifndef _DEV_VMM_MEM_H_
#define _DEV_VMM_MEM_H_
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+ VM_SYSMEM = 0,
+ VM_BOOTROM = VM_MAXSYSMEM,
+ VM_FRAMEBUFFER,
+ VM_PCIROM,
+ VM_MEMSEG_END
+};
+
+#define VM_MAX_MEMSEGS VM_MEMSEG_END
+#define VM_MAX_MEMMAPS (VM_MAX_MEMSEGS * 2)
+
#ifdef _KERNEL
#include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
int flags;
};
-#define VM_MAX_MEMSEGS 4
-#define VM_MAX_MEMMAPS 8
-
struct vm_mem {
struct vm_mem_map mem_maps[VM_MAX_MEMMAPS];
struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot, int flags);
int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset);
void vm_free_memseg(struct vm *vm, int ident);
/*
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
/* Reinit VGA mode, to restore view after app which change mode. */
vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
- /* Ask vt(9) to update chars on visible area. */
+ /* Ask vt(4) to update chars on visible area. */
vd->vd_flags |= VDF_INVALID;
}
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
(vw)->vw_number)
static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- "vt(9) parameters");
+ "vt(4) parameters");
static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");