38 files changed, 3610 insertions, 198 deletions
diff --git a/sys/dev/drm2/drm_fb_helper.c b/sys/dev/drm2/drm_fb_helper.c
index f67cc9f60d02..1f4abd255690 100644
--- a/sys/dev/drm2/drm_fb_helper.c
+++ b/sys/dev/drm2/drm_fb_helper.c
@@ -51,7 +51,7 @@ struct vt_kms_softc {
 	struct task		 fb_mode_task;
 };
 
-/* Call restore out of vt(9) locks. */
+/* Call restore out of vt(4) locks. */
 static void
 vt_restore_fbdev_mode(void *arg, int pending)
 {
diff --git a/sys/dev/efidev/efirt.c b/sys/dev/efidev/efirt.c
index b0fa33daeca7..b55c1c191077 100644
--- a/sys/dev/efidev/efirt.c
+++ b/sys/dev/efidev/efirt.c
@@ -107,7 +107,8 @@ static int efi_status2err[25] = {
 
 enum efi_table_type {
 	TYPE_ESRT = 0,
-	TYPE_PROP
+	TYPE_PROP,
+	TYPE_MEMORY_ATTR
 };
 
 static int efi_enter(void);
@@ -445,6 +446,42 @@ get_table_length(enum efi_table_type type, size_t *table_len, void **taddr)
 		free(buf, M_TEMP);
 		return (0);
 	}
+	case TYPE_MEMORY_ATTR:
+	{
+		efi_guid_t guid = EFI_MEMORY_ATTRIBUTES_TABLE;
+		struct efi_memory_attribute_table *tbl_addr, *mem_addr;
+		int error;
+		void *buf;
+		size_t len = sizeof(struct efi_memory_attribute_table);
+
+		error = efi_get_table(&guid, (void **)&tbl_addr);
+		if (error)
+			return (error);
+
+		buf = malloc(len, M_TEMP, M_WAITOK);
+		error = physcopyout((vm_paddr_t)tbl_addr, buf, len);
+		if (error) {
+			free(buf, M_TEMP);
+			return (error);
+		}
+
+		mem_addr = (struct efi_memory_attribute_table *)buf;
+		if (mem_addr->version != 2) {
+			free(buf, M_TEMP);
+			return (EINVAL);
+		}
+		len += mem_addr->descriptor_size * mem_addr->num_ents;
+		if (len > EFI_TABLE_ALLOC_MAX) {
+			free(buf, M_TEMP);
+			return (ENOMEM);
+		}
+
+		*table_len = len;
+		if (taddr != NULL)
+			*taddr = tbl_addr;
+		free(buf, M_TEMP);
+		return (0);
+	}
 	}
 	return (ENOENT);
 }
@@ -457,7 +494,8 @@ copy_table(efi_guid_t *guid, void **buf, size_t buf_len, size_t *table_len)
 		enum efi_table_type type;
 	} tables[] = {
 		{ EFI_TABLE_ESRT,       TYPE_ESRT },
-		{ EFI_PROPERTIES_TABLE, TYPE_PROP }
+		{ EFI_PROPERTIES_TABLE, TYPE_PROP },
+		{ EFI_MEMORY_ATTRIBUTES_TABLE, TYPE_MEMORY_ATTR }
 	};
 	size_t table_idx;
 	void *taddr;
diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c
index f9468e0deda0..94f4e5771266 100644
--- a/sys/dev/gpio/acpi_gpiobus.c
+++ b/sys/dev/gpio/acpi_gpiobus.c
@@ -357,7 +357,7 @@ acpi_gpiobus_attach(device_t dev)
 	status = AcpiWalkResources(handle, "_AEI", acpi_gpiobus_enumerate_aei,
 	    &ctx);
 
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND)
 		device_printf(dev, "Failed to enumerate AEI resources\n");
 
 	return (0);
diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c
index ab7f13177969..764bcb7e6ee8 100644
--- a/sys/dev/gpio/gpiobus.c
+++ b/sys/dev/gpio/gpiobus.c
@@ -110,10 +110,9 @@ gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags,
 	res = bus_alloc_resource(consumer_dev, SYS_RES_IRQ, rid, irq, irq, 1,
 	    alloc_flags);
 	if (res == NULL) {
-		intr_free_intr_map_data((struct intr_map_data *)gpio_data);
+		intr_unmap_irq(irq);
 		return (NULL);
 	}
-	rman_set_virtual(res, gpio_data);
 	return (res);
 }
 #else
@@ -866,6 +865,25 @@ gpiobus_alloc_resource(device_t bus, device_t child, int type, int *rid,
 	    end, count, flags));
 }
 
+static int
+gpiobus_release_resource(device_t dev, device_t child, struct resource *r)
+{
+	int err;
+#ifdef INTRNG
+	u_int irq;
+
+	irq = rman_get_start(r);
+	MPASS(irq == rman_get_end(r));
+#endif
+	err = bus_generic_rman_release_resource(dev, child, r);
+	if (err != 0)
+		return (err);
+#ifdef INTRNG
+	intr_unmap_irq(irq);
+#endif
+	return (0);
+}
+
 static struct resource_list *
 gpiobus_get_resource_list(device_t bus __unused, device_t child)
 {
@@ -1060,7 +1078,7 @@ static device_method_t gpiobus_methods[] = {
 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
 	DEVMETHOD(bus_alloc_resource,	gpiobus_alloc_resource),
-	DEVMETHOD(bus_release_resource,	bus_generic_rman_release_resource),
+	DEVMETHOD(bus_release_resource,	gpiobus_release_resource),
 	DEVMETHOD(bus_activate_resource,	bus_generic_rman_activate_resource),
 	DEVMETHOD(bus_deactivate_resource,	bus_generic_rman_deactivate_resource),
 	DEVMETHOD(bus_get_resource_list,	gpiobus_get_resource_list),
diff --git a/sys/dev/hyperv/vmbus/vmbus_chan.c b/sys/dev/hyperv/vmbus/vmbus_chan.c
index 189a3e66a039..7ea60a499c72 100644
--- a/sys/dev/hyperv/vmbus/vmbus_chan.c
+++ b/sys/dev/hyperv/vmbus/vmbus_chan.c
@@ -1555,7 +1555,7 @@ vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
 			continue;
 
 		flags = atomic_swap_long(&event_flags[f], 0);
-		chid_base = f << VMBUS_EVTFLAG_SHIFT;
+		chid_base = f * VMBUS_EVTFLAG_LEN;
 
 		while ((chid_ofs = ffsl(flags)) != 0) {
 			struct vmbus_channel *chan;
@@ -1599,7 +1599,7 @@ vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
 		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
-		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
+		    VMBUS_CHAN_MAX_COMPAT / VMBUS_EVTFLAG_LEN);
 	}
 }
 
@@ -1903,7 +1903,7 @@ vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
 	 * Setup event flag.
 	 */
 	chan->ch_evtflag =
-	    &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT];
+	    &sc->vmbus_tx_evtflags[chan->ch_id / VMBUS_EVTFLAG_LEN];
 	chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK);
 
 	/*
diff --git a/sys/dev/hyperv/vmbus/vmbus_reg.h b/sys/dev/hyperv/vmbus/vmbus_reg.h
index 4aa729475b5d..76cdca0ebeb2 100644
--- a/sys/dev/hyperv/vmbus/vmbus_reg.h
+++ b/sys/dev/hyperv/vmbus/vmbus_reg.h
@@ -60,16 +60,10 @@ CTASSERT(sizeof(struct vmbus_message) == VMBUS_MSG_SIZE);
  * Hyper-V SynIC event flags
  */
 
-#ifdef __LP64__
-#define VMBUS_EVTFLAGS_MAX	32
-#define VMBUS_EVTFLAG_SHIFT	6
-#else
-#define VMBUS_EVTFLAGS_MAX	64
-#define VMBUS_EVTFLAG_SHIFT	5
-#endif
-#define VMBUS_EVTFLAG_LEN	(1 << VMBUS_EVTFLAG_SHIFT)
+#define VMBUS_EVTFLAG_LEN	(sizeof(u_long) * 8)
 #define VMBUS_EVTFLAG_MASK	(VMBUS_EVTFLAG_LEN - 1)
 #define VMBUS_EVTFLAGS_SIZE	256
+#define VMBUS_EVTFLAGS_MAX	(VMBUS_EVTFLAGS_SIZE / sizeof(u_long))
 
 struct vmbus_evtflags {
 	u_long		evt_flags[VMBUS_EVTFLAGS_MAX];
diff --git a/sys/dev/ice/ice_features.h b/sys/dev/ice/ice_features.h
index 821abe4806ca..5b23757b1c98 100644
--- a/sys/dev/ice/ice_features.h
+++ b/sys/dev/ice/ice_features.h
@@ -91,7 +91,9 @@ enum feat_list {
 static inline void
 ice_disable_unsupported_features(ice_bitmap_t __unused *bitmap)
 {
+#ifndef PCI_IOV
 	ice_clear_bit(ICE_FEATURE_SRIOV, bitmap);
+#endif
 #ifndef DEV_NETMAP
 	ice_clear_bit(ICE_FEATURE_NETMAP, bitmap);
 #endif
diff --git a/sys/dev/ice/ice_iflib.h b/sys/dev/ice/ice_iflib.h
index 3a5dc201189a..e1d5307a9516 100644
--- a/sys/dev/ice/ice_iflib.h
+++ b/sys/dev/ice/ice_iflib.h
@@ -139,6 +139,9 @@ struct ice_irq_vector {
  * @tc: traffic class queue belongs to
  * @q_handle: qidx in tc; used in TXQ enable functions
  *
+ * ice_iov.c requires the following parameters (when PCI_IOV is defined):
+ * @itr_idx: ITR index to use for this queue
+ *
  * Other parameters may be iflib driver specific
  */
 struct ice_tx_queue {
@@ -153,6 +156,9 @@ struct ice_tx_queue {
 	u32			me;
 	u16			q_handle;
 	u8			tc;
+#ifdef PCI_IOV
+	u8			itr_idx;
+#endif
 
 	/* descriptor writeback status */
 	qidx_t			*tx_rsq;
@@ -175,6 +181,9 @@ struct ice_tx_queue {
  * @stats: queue statistics
  * @tc: traffic class queue belongs to
  *
+ * ice_iov.c requires the following parameters (when PCI_IOV is defined):
+ * @itr_idx: ITR index to use for this queue
+ *
  * Other parameters may be iflib driver specific
  */
 struct ice_rx_queue {
@@ -187,6 +196,9 @@ struct ice_rx_queue {
 	struct ice_irq_vector		*irqv;
 	u32				me;
 	u8				tc;
+#ifdef PCI_IOV
+	u8				itr_idx;
+#endif
 
 	struct if_irq			que_irq;
 };
@@ -332,6 +344,10 @@ struct ice_softc {
 	ice_declare_bitmap(feat_cap, ICE_FEATURE_COUNT);
 	ice_declare_bitmap(feat_en, ICE_FEATURE_COUNT);
 
+#ifdef PCI_IOV
+	struct ice_vf *vfs;
+	u16 num_vfs;
+#endif
 	struct ice_resmgr os_imgr;
 	/* For mirror interface */
 	struct ice_mirr_if *mirr_if;
diff --git a/sys/dev/ice/ice_iov.c b/sys/dev/ice/ice_iov.c
new file mode 100644
index 000000000000..c5a3e1060e44
--- /dev/null
+++ b/sys/dev/ice/ice_iov.c
@@ -0,0 +1,1856 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*  Copyright (c) 2025, Intel Corporation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *   3. Neither the name of the Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived from
+ *      this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file ice_iov.c
+ * @brief Virtualization support functions
+ *
+ * Contains functions for enabling and managing PCIe virtual function devices,
+ * including enabling new VFs, and managing VFs over the virtchnl interface.
+ */
+
+#include "ice_iov.h"
+
+static struct ice_vf *ice_iov_get_vf(struct ice_softc *sc, int vf_num);
+static void ice_iov_ready_vf(struct ice_softc *sc, struct ice_vf *vf);
+static void ice_reset_vf(struct ice_softc *sc, struct ice_vf *vf,
+			 bool trigger_vflr);
+static void ice_iov_setup_intr_mapping(struct ice_softc *sc, struct ice_vf *vf);
+
+static void ice_vc_version_msg(struct ice_softc *sc, struct ice_vf *vf,
+			       u8 *msg_buf);
+static void ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf,
+				  u8 *msg_buf);
+static void ice_vc_add_eth_addr_msg(struct ice_softc *sc, struct ice_vf *vf,
+				    u8 *msg_buf);
+static void ice_vc_del_eth_addr_msg(struct ice_softc *sc, struct ice_vf *vf,
+				    u8 *msg_buf);
+static bool ice_vc_isvalid_ring_len(u16 ring_len);
+static void ice_vc_cfg_vsi_qs_msg(struct ice_softc *sc, struct ice_vf *vf,
+				  u8 *msg_buf);
+static void ice_vc_cfg_rss_key_msg(struct ice_softc *sc, struct ice_vf *vf,
+				   u8 *msg_buf);
+static void ice_vc_set_rss_hena_msg(struct ice_softc *sc, struct ice_vf *vf,
+				    u8 *msg_buf);
+static void ice_vc_enable_queues_msg(struct ice_softc *sc, struct ice_vf *vf,
+				     u8 *msg_buf);
+static void ice_vc_notify_vf_link_state(struct ice_softc *sc, struct ice_vf *vf);
+static void ice_vc_disable_queues_msg(struct ice_softc *sc, struct ice_vf *vf,
+				      u8 *msg_buf);
+static void ice_vc_cfg_irq_map_msg(struct ice_softc *sc, struct ice_vf *vf,
+				   u8 *msg_buf);
+static void ice_vc_get_stats_msg(struct ice_softc *sc, struct ice_vf *vf,
+				 u8 *msg_buf);
+static void ice_eth_stats_to_virtchnl_eth_stats(struct ice_eth_stats *istats,
+     struct virtchnl_eth_stats *vstats);
+static void ice_vc_cfg_rss_lut_msg(struct ice_softc *sc, struct ice_vf *vf,
+				   u8 *msg_buf);
+static void ice_vc_cfg_promisc_mode_msg(struct ice_softc *sc, struct ice_vf *vf,
+				        u8 *msg_buf);
+static void ice_vc_add_vlan_msg(struct ice_softc *sc, struct ice_vf *vf,
+				u8 *msg_buf);
+static void ice_vc_del_vlan_msg(struct ice_softc *sc, struct ice_vf *vf,
+				u8 *msg_buf);
+static enum virtchnl_status_code ice_iov_err_to_virt_err(int ice_err);
+static int ice_vf_validate_mac(struct ice_vf *vf, const uint8_t *addr);
+
+/**
+ * ice_iov_attach - Initialize SR-IOV PF host support
+ * @sc: device softc structure
+ *
+ * Initialize SR-IOV PF host support at the end of the driver attach process.
+ *
+ * @pre Must be called from sleepable context (calls malloc() w/ M_WAITOK)
+ *
+ * @returns 0 if successful, or
+ * - ENOMEM if there is no memory for the PF/VF schemas or iov device
+ * - ENXIO if the device isn't PCI-E or doesn't support the same SR-IOV
+ *   version as the kernel
+ * - ENOENT if the device doesn't have the SR-IOV capability
+ */
+int
+ice_iov_attach(struct ice_softc *sc)
+{
+	device_t dev = sc->dev;
+	nvlist_t *pf_schema, *vf_schema;
+	int error;
+
+	pf_schema = pci_iov_schema_alloc_node();
+	vf_schema = pci_iov_schema_alloc_node();
+
+	pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL);
+	pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof",
+	    IOV_SCHEMA_HASDEFAULT, TRUE);
+	pci_iov_schema_add_bool(vf_schema, "allow-set-mac",
+	    IOV_SCHEMA_HASDEFAULT, FALSE);
+	pci_iov_schema_add_bool(vf_schema, "allow-promisc",
+	    IOV_SCHEMA_HASDEFAULT, FALSE);
+	pci_iov_schema_add_uint16(vf_schema, "num-queues",
+	    IOV_SCHEMA_HASDEFAULT, ICE_DEFAULT_VF_QUEUES);
+	pci_iov_schema_add_uint16(vf_schema, "mirror-src-vsi",
+	    IOV_SCHEMA_HASDEFAULT, ICE_INVALID_MIRROR_VSI);
+	pci_iov_schema_add_uint16(vf_schema, "max-vlan-allowed",
+	    IOV_SCHEMA_HASDEFAULT, ICE_DEFAULT_VF_VLAN_LIMIT);
+	pci_iov_schema_add_uint16(vf_schema, "max-mac-filters",
+	    IOV_SCHEMA_HASDEFAULT, ICE_DEFAULT_VF_FILTER_LIMIT);
+
+	error = pci_iov_attach(dev, pf_schema, vf_schema);
+	if (error != 0) {
+		device_printf(dev,
+		    "pci_iov_attach failed (error=%s)\n",
+		    ice_err_str(error));
+		ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
+	} else
+		ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_en);
+
+	return (error);
+}
+
+/**
+ * ice_iov_detach - Teardown SR-IOV PF host support
+ * @sc: device softc structure
+ *
+ * Teardown SR-IOV PF host support at the start of the driver detach process.
+ *
+ * @returns 0 if successful or IOV support hasn't been setup, or
+ * - EBUSY if VFs still exist
+ */
+int
+ice_iov_detach(struct ice_softc *sc)
+{
+	device_t dev = sc->dev;
+	int error;
+
+	error = pci_iov_detach(dev);
+	if (error != 0) {
+		device_printf(dev,
+		    "pci_iov_detach failed (error=%s)\n",
+		    ice_err_str(error));
+	}
+
+	return (error);
+}
+
+/**
+ * ice_iov_init - Called by the OS before the first VF is created.
+ * @sc: device softc structure
+ * @num_vfs: number of VFs to setup resources for
+ * @params: configuration parameters for the PF
+ *
+ * @returns 0 if successful or an error code on failure
+ */
+int
+ice_iov_init(struct ice_softc *sc, uint16_t num_vfs, const nvlist_t *params __unused)
+{
+	/* Allocate array of VFs, for tracking */
+	sc->vfs = (struct ice_vf *)malloc(sizeof(struct ice_vf) * num_vfs, M_ICE, M_NOWAIT |
+	    M_ZERO);
+	if (sc->vfs == NULL)
+		return (ENOMEM);
+
+	/* Initialize each VF with basic information */
+	for (int i = 0; i < num_vfs; i++)
+		sc->vfs[i].vf_num = i;
+
+	/* Save off number of configured VFs */
+	sc->num_vfs = num_vfs;
+
+	return (0);
+}
+
+/**
+ * ice_iov_get_vf - Get pointer to VF at given index
+ * @sc: device softc structure
+ * @vf_num: Index of VF to retrieve
+ *
+ * @remark will throw an assertion if vf_num is not in the
+ * range of allocated VFs
+ *
+ * @returns a pointer to the VF structure at the given index
+ */
+static struct ice_vf *
+ice_iov_get_vf(struct ice_softc *sc, int vf_num)
+{
+	MPASS(vf_num < sc->num_vfs);
+
+	return &sc->vfs[vf_num];
+}
+
+/**
+ * ice_iov_add_vf - Called by the OS for each VF to create
+ * @sc: device softc structure
+ * @vfnum: index of VF to configure
+ * @params: configuration parameters for the VF
+ *
+ * @returns 0 if successful or an error code on failure
+ */
+int
+ice_iov_add_vf(struct ice_softc *sc, uint16_t vfnum, const nvlist_t *params)
+{
+	struct ice_tx_queue *txq;
+	struct ice_rx_queue *rxq;
+	device_t dev = sc->dev;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+	int vf_num_queues;
+	const void *mac;
+	size_t size;
+	int error;
+	int i;
+
+	vf = ice_iov_get_vf(sc, vfnum);
+	vf->vf_flags = VF_FLAG_ENABLED;
+
+	/* This VF needs at least one VSI */
+	vsi = ice_alloc_vsi(sc, ICE_VSI_VF);
+	if (vsi == NULL)
+		return (ENOMEM);
+	vf->vsi = vsi;
+	vsi->vf_num = vfnum;
+
+	vf_num_queues = nvlist_get_number(params, "num-queues");
+	/* Validate and clamp value if invalid */
+	if (vf_num_queues < 1 || vf_num_queues > ICE_MAX_SCATTERED_QUEUES)
+		device_printf(dev, "Invalid num-queues (%d) for VF %d\n",
+		    vf_num_queues, vf->vf_num);
+	if (vf_num_queues < 1) {
+		device_printf(dev, "Setting VF %d num-queues to 1\n", vf->vf_num);
+		vf_num_queues = 1;
+	} else if (vf_num_queues > ICE_MAX_SCATTERED_QUEUES) {
+		device_printf(dev, "Setting VF %d num-queues to %d\n",
+		    vf->vf_num, ICE_MAX_SCATTERED_QUEUES);
+		vf_num_queues = ICE_MAX_SCATTERED_QUEUES;
+	}
+	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
+
+	/* Reserve VF queue allocation from PF queues */
+	ice_alloc_vsi_qmap(vsi, vf_num_queues, vf_num_queues);
+	vsi->num_tx_queues = vsi->num_rx_queues = vf_num_queues;
+
+	/* Assign Tx queues from PF space */
+	error = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
+					     vsi->num_tx_queues);
+	if (error) {
+		device_printf(sc->dev, "Unable to assign VF Tx queues: %s\n",
+			      ice_err_str(error));
+		goto release_vsi;
+	}
+
+	/* Assign Rx queues from PF space */
+	error = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
+					     vsi->num_rx_queues);
+	if (error) {
+		device_printf(sc->dev, "Unable to assign VF Rx queues: %s\n",
+			      ice_err_str(error));
+		goto release_vsi;
+	}
+
+	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
+
+	/* Allocate queue structure memory */
+	vsi->tx_queues = (struct ice_tx_queue *)
+	    malloc(sizeof(struct ice_tx_queue) * vsi->num_tx_queues, M_ICE,
+		   M_NOWAIT | M_ZERO);
+	if (!vsi->tx_queues) {
+		device_printf(sc->dev, "VF-%d: Unable to allocate Tx queue memory\n",
+			      vfnum);
+		error = ENOMEM;
+		goto release_vsi;
+	}
+	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
+		txq->me = i;
+		txq->vsi = vsi;
+	}
+
+	/* Allocate queue structure memory */
+	vsi->rx_queues = (struct ice_rx_queue *)
+	    malloc(sizeof(struct ice_rx_queue) * vsi->num_rx_queues, M_ICE,
+		   M_NOWAIT | M_ZERO);
+	if (!vsi->rx_queues) {
+		device_printf(sc->dev, "VF-%d: Unable to allocate Rx queue memory\n",
+			      vfnum);
+		error = ENOMEM;
+		goto free_txqs;
+	}
+	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) {
+		rxq->me = i;
+		rxq->vsi = vsi;
+	}
+
+	/* Allocate space to store the IRQ vector data */
+	vf->num_irq_vectors = vf_num_queues + 1;
+	vf->tx_irqvs = (struct ice_irq_vector *)
+	    malloc(sizeof(struct ice_irq_vector) * (vf->num_irq_vectors),
+		   M_ICE, M_NOWAIT);
+	if (!vf->tx_irqvs) {
+		device_printf(sc->dev,
+			      "Unable to allocate TX irqv memory for VF-%d's %d vectors\n",
+			      vfnum, vf->num_irq_vectors);
+		error = ENOMEM;
+		goto free_rxqs;
+	}
+	vf->rx_irqvs = (struct ice_irq_vector *)
+	    malloc(sizeof(struct ice_irq_vector) * (vf->num_irq_vectors),
+		   M_ICE, M_NOWAIT);
+	if (!vf->rx_irqvs) {
+		device_printf(sc->dev,
+			      "Unable to allocate RX irqv memory for VF-%d's %d vectors\n",
+			      vfnum, vf->num_irq_vectors);
+		error = ENOMEM;
+		goto free_txirqvs;
+	}
+
+	/* Assign VF interrupts from PF space */
+	if (!(vf->vf_imap =
+	      (u16 *)malloc(sizeof(u16) * vf->num_irq_vectors,
+	      M_ICE, M_NOWAIT))) {
+		device_printf(dev, "Unable to allocate VF-%d imap memory\n", vfnum);
+		error = ENOMEM;
+		goto free_rxirqvs;
+	}
+	error = ice_resmgr_assign_contiguous(&sc->dev_imgr, vf->vf_imap, vf->num_irq_vectors);
+	if (error) {
+		device_printf(dev, "Unable to assign VF-%d interrupt mapping: %s\n",
+			      vfnum, ice_err_str(error));
+		goto free_imap;
+	}
+
+	if (nvlist_exists_binary(params, "mac-addr")) {
+		mac = nvlist_get_binary(params, "mac-addr", &size);
+		memcpy(vf->mac, mac, ETHER_ADDR_LEN);
+
+		if (nvlist_get_bool(params, "allow-set-mac"))
+			vf->vf_flags |= VF_FLAG_SET_MAC_CAP;
+	} else
+		/*
+		 * If the administrator has not specified a MAC address then
+		 * we must allow the VF to choose one.
+		 */
+		vf->vf_flags |= VF_FLAG_SET_MAC_CAP;
+
+	if (nvlist_get_bool(params, "mac-anti-spoof"))
+		vf->vf_flags |= VF_FLAG_MAC_ANTI_SPOOF;
+
+	if (nvlist_get_bool(params, "allow-promisc"))
+		vf->vf_flags |= VF_FLAG_PROMISC_CAP;
+
+	vsi->mirror_src_vsi = nvlist_get_number(params, "mirror-src-vsi");
+
+	vf->vlan_limit = nvlist_get_number(params, "max-vlan-allowed");
+	vf->mac_filter_limit = nvlist_get_number(params, "max-mac-filters");
+
+	vf->vf_flags |= VF_FLAG_VLAN_CAP;
+
+	/* Create and setup VSI in HW */
+	error = ice_initialize_vsi(vsi);
+	if (error) {
+		device_printf(sc->dev, "Unable to initialize VF %d VSI: %s\n",
+			      vfnum, ice_err_str(error));
+		goto release_imap;
+	}
+
+	/* Add the broadcast address */
+	error = ice_add_vsi_mac_filter(vsi, broadcastaddr);
+	if (error) {
+		device_printf(sc->dev, "Unable to add broadcast filter VF %d VSI: %s\n",
+			      vfnum, ice_err_str(error));
+		goto release_imap;
+	}
+
+	ice_iov_ready_vf(sc, vf);
+
+	return (0);
+
+release_imap:
+	ice_resmgr_release_map(&sc->dev_imgr, vf->vf_imap,
+			       vf->num_irq_vectors);
+free_imap:
+	free(vf->vf_imap, M_ICE);
+	vf->vf_imap = NULL;
+free_rxirqvs:
+	free(vf->rx_irqvs, M_ICE);
+	vf->rx_irqvs = NULL;
+free_txirqvs:
+	free(vf->tx_irqvs, M_ICE);
+	vf->tx_irqvs = NULL;
+free_rxqs:
+	free(vsi->rx_queues, M_ICE);
+	vsi->rx_queues = NULL;
+free_txqs:
+	free(vsi->tx_queues, M_ICE);
+	vsi->tx_queues = NULL;
+release_vsi:
+	ice_release_vsi(vsi);
+	vf->vsi = NULL;
+	return (error);
+}
+
+/**
+ * ice_iov_uninit - Called by the OS when VFs are destroyed
+ * @sc: device softc structure
+ */
+void
+ice_iov_uninit(struct ice_softc *sc)
+{
+	struct ice_vf *vf;
+	struct ice_vsi *vsi;
+
+	/* Release per-VF resources */
+	for (int i = 0; i < sc->num_vfs; i++) {
+		vf = &sc->vfs[i];
+		vsi = vf->vsi;
+
+		/* Free VF interrupt reservation */
+		if (vf->vf_imap) {
+			free(vf->vf_imap, M_ICE);
+			vf->vf_imap = NULL;
+		}
+
+		/* Free queue interrupt mapping trackers */
+		if (vf->tx_irqvs) {
+			free(vf->tx_irqvs, M_ICE);
+			vf->tx_irqvs = NULL;
+		}
+		if (vf->rx_irqvs) {
+			free(vf->rx_irqvs, M_ICE);
+			vf->rx_irqvs = NULL;
+		}
+
+		if (!vsi)
+			continue;
+
+		/* Free VSI queues */
+		if (vsi->tx_queues) {
+			free(vsi->tx_queues, M_ICE);
+			vsi->tx_queues = NULL;
+		}
+		if (vsi->rx_queues) {
+			free(vsi->rx_queues, M_ICE);
+			vsi->rx_queues = NULL;
+		}
+
+		ice_release_vsi(vsi);
+		vf->vsi = NULL;
+	}
+
+	/* Release memory used for VF tracking */
+	if (sc->vfs) {
+		free(sc->vfs, M_ICE);
+		sc->vfs = NULL;
+	}
+	sc->num_vfs = 0;
+}
+
+/**
+ * ice_iov_handle_vflr - Process VFLR event
+ * @sc: device softc structure
+ *
+ * Identifys which VFs have been reset and re-configure
+ * them.
+ */
+void
+ice_iov_handle_vflr(struct ice_softc *sc)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct ice_vf *vf;
+	u32 reg, reg_idx, bit_idx;
+
+	for (int i = 0; i < sc->num_vfs; i++) {
+		vf = &sc->vfs[i];
+
+		reg_idx = (hw->func_caps.vf_base_id + vf->vf_num) / 32;
+		bit_idx = (hw->func_caps.vf_base_id + vf->vf_num) % 32;
+		reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+		if (reg & BIT(bit_idx))
+			ice_reset_vf(sc, vf, false);
+	}
+}
+
+/**
+ * ice_iov_ready_vf - Setup VF interrupts and mark it as ready
+ * @sc: device softc structure
+ * @vf: driver's VF structure for the VF to update
+ *
+ * Clears VF reset triggering bit, sets up the PF<->VF interrupt
+ * mapping and marks the VF as active in the HW so that the VF
+ * driver can use it.
+ */
+static void
+ice_iov_ready_vf(struct ice_softc *sc, struct ice_vf *vf)
+{
+	struct ice_hw *hw = &sc->hw;
+	u32 reg;
+
+	/* Clear the triggering bit */
+	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_num));
+	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+	wr32(hw, VPGEN_VFRTRIG(vf->vf_num), reg);
+
+	/* Setup VF interrupt allocation and mapping */
+	ice_iov_setup_intr_mapping(sc, vf);
+
+	/* Indicate to the VF that reset is done */
+	wr32(hw, VFGEN_RSTAT(vf->vf_num), VIRTCHNL_VFR_VFACTIVE);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_reset_vf - Perform a hardware reset (VFR) on a VF
+ * @sc: device softc structure
+ * @vf: driver's VF structure for VF to be reset
+ * @trigger_vflr: trigger a reset or only handle already executed reset
+ *
+ * Performs a VFR for the given VF. This function busy waits until the
+ * reset completes in the HW, notifies the VF that the reset is done
+ * by setting a bit in a HW register, then returns.
+ *
+ * @remark This also sets up the PF<->VF interrupt mapping and allocations in
+ * the hardware after the hardware reset is finished, via
+ * ice_iov_setup_intr_mapping()
+ */
+static void
+ice_reset_vf(struct ice_softc *sc, struct ice_vf *vf, bool trigger_vflr)
+{
+	u16 global_vf_num, reg_idx, bit_idx;
+	struct ice_hw *hw = &sc->hw;
+	int status;
+	u32 reg;
+	int i;
+
+	global_vf_num = vf->vf_num + hw->func_caps.vf_base_id;
+
+	if (trigger_vflr) {
+		reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_num));
+		reg |= VPGEN_VFRTRIG_VFSWR_M;
+		wr32(hw, VPGEN_VFRTRIG(vf->vf_num), reg);
+	}
+
+	/* clear the VFLR bit for the VF in a GLGEN_VFLRSTAT register */
+	reg_idx = (global_vf_num) / 32;
+	bit_idx = (global_vf_num) % 32;
+	wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+	ice_flush(hw);
+
+	/* Wait until there are no pending PCI transactions */
+	wr32(hw, PF_PCI_CIAA,
+	     ICE_PCIE_DEV_STATUS | (global_vf_num << PF_PCI_CIAA_VF_NUM_S));
+
+	for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
+		reg = rd32(hw, PF_PCI_CIAD);
+		if (!(reg & PCIEM_STA_TRANSACTION_PND))
+			break;
+
+		DELAY(ICE_PCI_CIAD_WAIT_DELAY_US);
+	}
+	if (i == ICE_PCI_CIAD_WAIT_COUNT)
+		device_printf(sc->dev,
+			"VF-%d PCI transactions stuck\n", vf->vf_num);
+
+	/* Disable TX queues, which is required during VF reset */
+	status = ice_dis_vsi_txq(hw->port_info, vf->vsi->idx, 0, 0, NULL, NULL,
+			NULL, ICE_VF_RESET, vf->vf_num, NULL);
+	if (status)
+		device_printf(sc->dev,
+			      "%s: Failed to disable LAN Tx queues: err %s aq_err %s\n",
+			      __func__, ice_status_str(status),
+			      ice_aq_str(hw->adminq.sq_last_status));
+
+	/* Then check for the VF reset to finish in HW */
+	for (i = 0; i < ICE_VPGEN_VFRSTAT_WAIT_COUNT; i++) {
+		reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_num));
+		if ((reg & VPGEN_VFRSTAT_VFRD_M))
+			break;
+
+		DELAY(ICE_VPGEN_VFRSTAT_WAIT_DELAY_US);
+	}
+	if (i == ICE_VPGEN_VFRSTAT_WAIT_COUNT)
+		device_printf(sc->dev,
+			"VF-%d Reset is stuck\n", vf->vf_num);
+
+	ice_iov_ready_vf(sc, vf);
+}
+
+/**
+ * ice_vc_get_vf_res_msg - Handle VIRTCHNL_OP_GET_VF_RESOURCES msg from VF
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ * @msg_buf: raw message buffer from the VF
+ *
+ * Receives a message from the VF listing its supported capabilities, and
+ * replies to the VF with information about what resources the PF has
+ * allocated for the VF.
+ *
+ * @remark This always replies to the VF with a success status; it does not
+ * fail. It's up to the VF driver to reject or complain about the PF's response.
+ */
+static void
+ice_vc_get_vf_res_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_vf_resource *vf_res;
+	struct virtchnl_vsi_resource *vsi_res;
+	u16 vf_res_len;
+	u32 vf_caps;
+
+	/* XXX: Only support one VSI per VF, so this size doesn't need adjusting */
+	vf_res_len = sizeof(struct virtchnl_vf_resource);
+	vf_res = (struct virtchnl_vf_resource *)malloc(vf_res_len, M_ICE,
+	    M_WAITOK | M_ZERO);
+
+	vf_res->num_vsis = 1;
+	vf_res->num_queue_pairs = vf->vsi->num_tx_queues;
+	vf_res->max_vectors = vf_res->num_queue_pairs + 1;
+
+	vf_res->rss_key_size = ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE;
+	vf_res->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+	vf_res->max_mtu = 0;
+
+	vf_res->vf_cap_flags = VF_BASE_MODE_OFFLOADS;
+	if (msg_buf != NULL) {
+		vf_caps = *((u32 *)(msg_buf));
+
+		if (vf_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+			vf_res->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+		if (vf_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+                        vf_res->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+	}
+
+	vsi_res = &vf_res->vsi_res[0];
+	vsi_res->vsi_id = vf->vsi->idx;
+	vsi_res->num_queue_pairs = vf->vsi->num_tx_queues;
+	vsi_res->vsi_type = VIRTCHNL_VSI_SRIOV;
+	vsi_res->qset_handle = 0;
+	if (!ETHER_IS_ZERO(vf->mac))
+		memcpy(vsi_res->default_mac_addr, vf->mac, ETHER_ADDR_LEN);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_GET_VF_RESOURCES,
+	    VIRTCHNL_STATUS_SUCCESS, (u8 *)vf_res, vf_res_len, NULL);
+
+	free(vf_res, M_ICE);
+}
+
+/**
+ * ice_vc_version_msg - Handle VIRTCHNL_OP_VERSION msg from VF
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ * @msg_buf: raw message buffer from the VF
+ *
+ * Receives a version message from the VF, and responds to the VF with
+ * the version number that the PF will use.
+ *
+ * @remark This always replies to the VF with a success status; it does not
+ * fail.
+ */
+static void
+ice_vc_version_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct virtchnl_version_info *recv_vf_version;
+	struct ice_hw *hw = &sc->hw;
+	device_t dev = sc->dev;
+
+	recv_vf_version = (struct virtchnl_version_info *)msg_buf;
+
+	/* VFs running the 1.0 API expect to get 1.0 back */
+	if (VF_IS_V10(recv_vf_version)) {
+		vf->version.major = 1;
+		vf->version.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+	} else {
+		vf->version.major = VIRTCHNL_VERSION_MAJOR;
+		vf->version.minor = VIRTCHNL_VERSION_MINOR;
+
+		if ((recv_vf_version->major != VIRTCHNL_VERSION_MAJOR) ||
+		    (recv_vf_version->minor != VIRTCHNL_VERSION_MINOR))
+		    device_printf(dev,
+		        "%s: VF-%d requested version (%d.%d) differs from PF version (%d.%d)\n",
+			__func__, vf->vf_num,
+			recv_vf_version->major, recv_vf_version->minor,
+			VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR);
+	}
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_VERSION,
+	    VIRTCHNL_STATUS_SUCCESS, (u8 *)&vf->version, sizeof(vf->version),
+	    NULL);
+}
+
+/**
+ * ice_vf_validate_mac - Validate MAC address before adding it
+ * @vf: VF tracking structure
+ * @addr: MAC address to validate
+ *
+ * Validate a MAC address before adding it to a VF during the handling
+ * of a VIRTCHNL_OP_ADD_ETH_ADDR operation. Notably, this also checks if
+ * the VF is allowed to set its own arbitrary MAC addresses.
+ *
+ * Returns 0 if MAC address is valid for the given vf
+ */
+static int
+ice_vf_validate_mac(struct ice_vf *vf, const uint8_t *addr)
+{
+
+	if (ETHER_IS_ZERO(addr) || ETHER_IS_BROADCAST(addr))
+		return (EINVAL);
+
+	/*
+	 * If the VF is not allowed to change its MAC address, don't let it
+	 * set a MAC filter for an address that is not a multicast address and
+	 * is not its assigned MAC.
+	 */
+	if (!(vf->vf_flags & VF_FLAG_SET_MAC_CAP) &&
+	    !(ETHER_IS_MULTICAST(addr) || !bcmp(addr, vf->mac, ETHER_ADDR_LEN)))
+		return (EPERM);
+
+	return (0);
+}
+
+/**
+ * ice_vc_add_eth_addr_msg - Handle VIRTCHNL_OP_ADD_ETH_ADDR msg from VF
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ * @msg_buf: raw message buffer from the VF
+ *
+ * Receives a list of MAC addresses from the VF and adds those addresses
+ * to the VSI's filter list.
+ */
+static void
+ice_vc_add_eth_addr_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *addr_list;
+	struct ice_hw *hw = &sc->hw;
+	u16 added_addr_cnt = 0;
+	int error = 0;
+
+	addr_list = (struct virtchnl_ether_addr_list *)msg_buf;
+
+	if (addr_list->num_elements >
+	    (vf->mac_filter_limit - vf->mac_filter_cnt)) {
+		v_status = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		goto done;
+	}
+
+	for (int i = 0; i < addr_list->num_elements; i++) {
+		u8 *addr = addr_list->list[i].addr;
+
+		/* The type flag is currently ignored; every MAC address is
+		 * treated as the LEGACY type
+		 */
+
+		error = ice_vf_validate_mac(vf, addr);
+		if (error == EPERM) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: Not permitted to add MAC addr for VSI %d\n",
+			    __func__, vf->vf_num, vf->vsi->idx);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			continue;
+		} else if (error) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: Did not add invalid MAC addr for VSI %d\n",
+			    __func__, vf->vf_num, vf->vsi->idx);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			continue;
+		}
+
+		error = ice_add_vsi_mac_filter(vf->vsi, addr);
+		if (error) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: Error adding MAC addr for VSI %d\n",
+			    __func__, vf->vf_num, vf->vsi->idx);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			continue;
+		}
+		/* Don't count VF's MAC against its MAC filter limit */
+		if (memcmp(addr, vf->mac, ETHER_ADDR_LEN))
+			added_addr_cnt++;
+	}
+
+	vf->mac_filter_cnt += added_addr_cnt;
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_ADD_ETH_ADDR,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_del_eth_addr_msg - Handle VIRTCHNL_OP_DEL_ETH_ADDR msg from VF
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ * @msg_buf: raw message buffer from the VF
+ *
+ * Receives a list of MAC addresses from the VF and removes those addresses
+ * from the VSI's filter list.
+ */
+static void
+ice_vc_del_eth_addr_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *addr_list;
+	struct ice_hw *hw = &sc->hw;
+	u16 deleted_addr_cnt = 0;
+	int error = 0;
+
+	addr_list = (struct virtchnl_ether_addr_list *)msg_buf;
+
+	for (int i = 0; i < addr_list->num_elements; i++) {
+		error = ice_remove_vsi_mac_filter(vf->vsi, addr_list->list[i].addr);
+		if (error) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: Error removing MAC addr for VSI %d\n",
+			    __func__, vf->vf_num, vf->vsi->idx);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			continue;
+		}
+		/* Don't count VF's MAC against its MAC filter limit */
+		if (memcmp(addr_list->list[i].addr, vf->mac, ETHER_ADDR_LEN))
+			deleted_addr_cnt++;
+	}
+
+	if (deleted_addr_cnt >= vf->mac_filter_cnt)
+		vf->mac_filter_cnt = 0;
+	else
+		vf->mac_filter_cnt -= deleted_addr_cnt;
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_DEL_ETH_ADDR,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_add_vlan_msg - Handle VIRTCHNL_OP_ADD_VLAN msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Adds the VLANs in msg_buf to the VF's VLAN filter list.
+ */
+static void
+ice_vc_add_vlan_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_vlan_filter_list *vlan_list;
+	int status = 0;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+
+	vlan_list = (struct virtchnl_vlan_filter_list *)msg_buf;
+
+	if (vlan_list->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+			      "VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+			      vf->vf_num, vsi->idx, vlan_list->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	if (vlan_list->num_elements > (vf->vlan_limit - vf->vlan_cnt)) {
+		v_status = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		goto done;
+	}
+
+	status = ice_add_vlan_hw_filters(vsi, vlan_list->vlan_id,
+					vlan_list->num_elements);
+	if (status) {
+		device_printf(sc->dev,
+			      "VF-%d: Failure adding VLANs to VSI %d, err %s aq_err %s\n",
+			      vf->vf_num, vsi->idx, ice_status_str(status),
+			      ice_aq_str(sc->hw.adminq.sq_last_status));
+		v_status = ice_iov_err_to_virt_err(status);
+		goto done;
+	}
+
+	vf->vlan_cnt += vlan_list->num_elements;
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_ADD_VLAN,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_del_vlan_msg - Handle VIRTCHNL_OP_DEL_VLAN msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Removes the VLANs in msg_buf from the VF's VLAN filter list.
+ */
+static void
+ice_vc_del_vlan_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_vlan_filter_list *vlan_list;
+	int status = 0;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+
+	vlan_list = (struct virtchnl_vlan_filter_list *)msg_buf;
+
+	if (vlan_list->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+			      "VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+			      vf->vf_num, vsi->idx, vlan_list->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	status = ice_remove_vlan_hw_filters(vsi, vlan_list->vlan_id,
+					vlan_list->num_elements);
+	if (status) {
+		device_printf(sc->dev,
+			      "VF-%d: Failure deleting VLANs from VSI %d, err %s aq_err %s\n",
+			      vf->vf_num, vsi->idx, ice_status_str(status),
+			      ice_aq_str(sc->hw.adminq.sq_last_status));
+		v_status = ice_iov_err_to_virt_err(status);
+		goto done;
+	}
+
+	if (vlan_list->num_elements >= vf->vlan_cnt)
+		vf->vlan_cnt = 0;
+	else
+		vf->vlan_cnt -= vlan_list->num_elements;
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_DEL_VLAN,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_validate_ring_len - Check to see if a descriptor ring length is valid
+ * @ring_len: length of ring
+ *
+ * Check whether a ring size value is valid.
+ *
+ * @returns true if given ring size is valid
+ */
+static bool
+ice_vc_isvalid_ring_len(u16 ring_len)
+{
+	return (ring_len >= ICE_MIN_DESC_COUNT &&
+		ring_len <= ICE_MAX_DESC_COUNT &&
+		!(ring_len % ICE_DESC_COUNT_INCR));
+}
+
+/**
+ * ice_vc_cfg_vsi_qs_msg - Handle VIRTCHNL_OP_CONFIG_VSI_QUEUES msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ */
+static void
+ice_vc_cfg_vsi_qs_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	device_t dev = sc->dev;
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_vsi_queue_config_info *vqci;
+	struct virtchnl_queue_pair_info *vqpi;
+	enum virtchnl_status_code status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+	struct ice_tx_queue *txq;
+	struct ice_rx_queue *rxq;
+	int i, error = 0;
+
+	vqci = (struct virtchnl_vsi_queue_config_info *)msg_buf;
+
+	if (vqci->num_queue_pairs > vf->vsi->num_tx_queues &&
+	    vqci->num_queue_pairs > vf->vsi->num_rx_queues) {
+		status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	ice_vsi_disable_tx(vf->vsi);
+	ice_control_all_rx_queues(vf->vsi, false);
+
+	/*
+	 * Clear TX and RX queues config in case VF
+	 * requests different number of queues.
+	 */
+	for (i = 0; i < vsi->num_tx_queues; i++) {
+		txq = &vsi->tx_queues[i];
+
+		txq->desc_count = 0;
+		txq->tx_paddr = 0;
+		txq->tc = 0;
+	}
+
+	for (i = 0; i < vsi->num_rx_queues; i++) {
+		rxq = &vsi->rx_queues[i];
+
+		rxq->desc_count = 0;
+		rxq->rx_paddr = 0;
+	}
+
+	vqpi = vqci->qpair;
+	for (i = 0; i < vqci->num_queue_pairs; i++, vqpi++) {
+		/* Initial parameter validation */
+		if (vqpi->txq.vsi_id != vf->vsi->idx ||
+		    vqpi->rxq.vsi_id != vf->vsi->idx ||
+		    vqpi->txq.queue_id != vqpi->rxq.queue_id ||
+		    vqpi->txq.headwb_enabled ||
+		    vqpi->rxq.splithdr_enabled ||
+		    vqpi->rxq.crc_disable ||
+		    !(ice_vc_isvalid_ring_len(vqpi->txq.ring_len)) ||
+		    !(ice_vc_isvalid_ring_len(vqpi->rxq.ring_len))) {
+			status = VIRTCHNL_STATUS_ERR_PARAM;
+			goto done;
+		}
+
+		/* Copy parameters into VF's queue/VSI structs */
+		txq = &vsi->tx_queues[vqpi->txq.queue_id];
+
+		txq->desc_count = vqpi->txq.ring_len;
+		txq->tx_paddr = vqpi->txq.dma_ring_addr;
+		txq->q_handle = vqpi->txq.queue_id;
+		txq->tc = 0;
+
+		rxq = &vsi->rx_queues[vqpi->rxq.queue_id];
+
+		rxq->desc_count = vqpi->rxq.ring_len;
+		rxq->rx_paddr = vqpi->rxq.dma_ring_addr;
+		vsi->mbuf_sz = vqpi->rxq.databuffer_size;
+	}
+
+	/* Configure TX queues in HW */
+	error = ice_cfg_vsi_for_tx(vsi);
+	if (error) {
+		device_printf(dev,
+			      "VF-%d: Unable to configure VSI for Tx: %s\n",
+			      vf->vf_num, ice_err_str(error));
+		status = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+		goto done;
+	}
+
+	/* Configure RX queues in HW */
+	error = ice_cfg_vsi_for_rx(vsi);
+	if (error) {
+		device_printf(dev,
+			      "VF-%d: Unable to configure VSI for Rx: %s\n",
+			      vf->vf_num, ice_err_str(error));
+		status = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+		ice_vsi_disable_tx(vsi);
+		goto done;
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+	    status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_cfg_rss_key_msg - Handle VIRTCHNL_OP_CONFIG_RSS_KEY msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Sets the RSS key for the given VF, using the contents of msg_buf.
+ */
+static void
+ice_vc_cfg_rss_key_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_aqc_get_set_rss_keys keydata =
+	    { .standard_rss_key = {0}, .extended_hash_key = {0} };
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_rss_key *vrk;
+	int status = 0;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+
+	vrk = (struct virtchnl_rss_key *)msg_buf;
+
+	if (vrk->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+		    "VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+		    vf->vf_num, vsi->idx, vrk->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	if ((vrk->key_len >
+	   (ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE +
+	    ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE)) ||
+	    vrk->key_len == 0) {
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	memcpy(&keydata, vrk->key, vrk->key_len);
+
+	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
+	if (status) {
+		device_printf(sc->dev,
+			      "ice_aq_set_rss_key status %s, error %s\n",
+			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
+		v_status = ice_iov_err_to_virt_err(status);
+		goto done;
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_CONFIG_RSS_KEY,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_cfg_rss_lut_msg - Handle VIRTCHNL_OP_CONFIG_RSS_LUT msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Adds the LUT from the VF in msg_buf to the PF via an admin queue call.
+ */
+static void
+ice_vc_cfg_rss_lut_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_rss_lut *vrl;
+	int status = 0;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_aq_get_set_rss_lut_params lut_params = {};
+	struct ice_vsi *vsi = vf->vsi;
+
+	vrl = (struct virtchnl_rss_lut *)msg_buf;
+
+	if (vrl->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+		    "VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+		    vf->vf_num, vsi->idx, vrl->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	if (vrl->lut_entries > ICE_VSIQF_HLUT_ARRAY_SIZE) {
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	lut_params.vsi_handle = vsi->idx;
+	lut_params.lut_size = vsi->rss_table_size;
+	lut_params.lut_type = vsi->rss_lut_type;
+	lut_params.lut = vrl->lut;
+	lut_params.global_lut_id = 0;
+
+	status = ice_aq_set_rss_lut(hw, &lut_params);
+	if (status) {
+		device_printf(sc->dev,
+			      "VF-%d: Cannot set RSS lut, err %s aq_err %s\n",
+			      vf->vf_num, ice_status_str(status),
+			      ice_aq_str(hw->adminq.sq_last_status));
+		v_status = ice_iov_err_to_virt_err(status);
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_CONFIG_RSS_LUT,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_set_rss_hena_msg - Handle VIRTCHNL_OP_SET_RSS_HENA msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Adds the VF's hena (hash enable) bits as flow types to the PF's RSS flow
+ * type list.
+ */
+static void
+ice_vc_set_rss_hena_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_rss_hena *vrh;
+	int status = 0;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+
+	MPASS(vsi != NULL);
+
+	vrh = (struct virtchnl_rss_hena *)msg_buf;
+
+	/*
+	 * Remove existing configuration to make sure only requested
+	 * config is applied and allow VFs to disable RSS completly.
+	 */
+	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
+	if (vrh->hena) {
+		/*
+		 * Problem with removing config is not fatal, when new one
+		 * is requested. Warn about it but try to apply new config
+		 * anyway.
+		 */
+		if (status)
+			device_printf(sc->dev,
+			    "ice_rem_vsi_rss_cfg status %s, error %s\n",
+			    ice_status_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
+		status = ice_add_avf_rss_cfg(hw, vsi->idx, vrh->hena);
+		if (status)
+			device_printf(sc->dev,
+			    "ice_add_avf_rss_cfg status %s, error %s\n",
+			    ice_status_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
+	}
+	v_status = ice_iov_err_to_virt_err(status);
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_SET_RSS_HENA,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_enable_queues_msg - Handle VIRTCHNL_OP_ENABLE_QUEUES msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Enables VF queues selected in msg_buf for Tx/Rx traffic.
+ *
+ * @remark Only actually operates on Rx queues; Tx queues are enabled in
+ * CONFIG_VSI_QUEUES message handler.
+ */
+static void
+ice_vc_enable_queues_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_queue_select *vqs;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+	int bit, error = 0;
+
+	vqs = (struct virtchnl_queue_select *)msg_buf;
+
+	if (vqs->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+		    "%s: VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+		    __func__, vf->vf_num, vsi->idx, vqs->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	if (!vqs->rx_queues && !vqs->tx_queues) {
+		device_printf(sc->dev,
+		    "%s: VF-%d: message queue masks are empty\n",
+		    __func__, vf->vf_num);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	/* Validate rx_queue mask */
+	bit = fls(vqs->rx_queues);
+	if (bit > vsi->num_rx_queues) {
+		device_printf(sc->dev,
+		    "%s: VF-%d: message's rx_queues map (0x%08x) has invalid bit set (%d)\n",
+		    __func__, vf->vf_num, vqs->rx_queues, bit);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	/* Tx ring enable is handled in an earlier message. */
+	for_each_set_bit(bit, &vqs->rx_queues, 32) {
+		error = ice_control_rx_queue(vsi, bit, true);
+		if (error) {
+			device_printf(sc->dev,
+				      "Unable to enable Rx ring %d for receive: %s\n",
+				      bit, ice_err_str(error));
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			goto done;
+		}
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_ENABLE_QUEUES,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_disable_queues_msg - Handle VIRTCHNL_OP_DISABLE_QUEUES msg
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Disables all VF queues for the VF's VSI.
+ *
+ * @remark Unlike the ENABLE_QUEUES handler, this operates on both
+ * Tx and Rx queues
+ */
+static void
+ice_vc_disable_queues_msg(struct ice_softc *sc, struct ice_vf *vf,
+			  u8 *msg_buf __unused)
+{
+	struct ice_hw *hw = &sc->hw;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+	int error = 0;
+
+	error = ice_control_all_rx_queues(vsi, false);
+	if (error) {
+		device_printf(sc->dev,
+			      "Unable to disable Rx rings for transmit: %s\n",
+			      ice_err_str(error));
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	error = ice_vsi_disable_tx(vsi);
+	if (error) {
+		/* Already prints an error message */
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_DISABLE_QUEUES,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg - Handle VIRTCHNL_OP_CFG_IRQ_MAP msg from VF
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Configures the interrupt vectors described in the message in msg_buf. The
+ * VF needs to send this message during init, so that queues can be allowed
+ * to generate interrupts.
+ */
+static void
+ice_vc_cfg_irq_map_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+#define ICE_VIRTCHNL_QUEUE_MAP_SIZE	16
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_irq_map_info *vimi;
+	struct virtchnl_vector_map *vvm;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi = vf->vsi;
+	u16 vector;
+
+	vimi = (struct virtchnl_irq_map_info *)msg_buf;
+
+	if (vimi->num_vectors > vf->num_irq_vectors) {
+		device_printf(sc->dev,
+		    "%s: VF-%d: message has more vectors (%d) than configured for VF (%d)\n",
+		    __func__, vf->vf_num, vimi->num_vectors, vf->num_irq_vectors);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	vvm = vimi->vecmap;
+	/* Save off information from message */
+	for (int i = 0; i < vimi->num_vectors; i++, vvm++) {
+		struct ice_tx_queue *txq;
+		struct ice_rx_queue *rxq;
+		int bit;
+
+		if (vvm->vsi_id != vf->vsi->idx) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: message's VSI ID (%d) does not match VF's (%d) for vector %d\n",
+			    __func__, vf->vf_num, vvm->vsi_id, vf->vsi->idx, i);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			goto done;
+		}
+
+		/* vvm->vector_id is relative to VF space */
+		vector = vvm->vector_id;
+
+		if (vector >= vf->num_irq_vectors) {
+			device_printf(sc->dev,
+			    "%s: VF-%d: message's vector ID (%d) is greater than VF's max ID (%d)\n",
+			    __func__, vf->vf_num, vector, vf->num_irq_vectors - 1);
+			v_status = VIRTCHNL_STATUS_ERR_PARAM;
+			goto done;
+		}
+
+		/* The Misc/Admin Queue vector doesn't need mapping */
+		if (vector == 0)
+			continue;
+
+		/* coverity[address_of] */
+		for_each_set_bit(bit, &vvm->txq_map, ICE_VIRTCHNL_QUEUE_MAP_SIZE) {
+			if (bit >= vsi->num_tx_queues) {
+				device_printf(sc->dev,
+				    "%s: VF-%d: txq map has invalid bit set\n",
+				    __func__, vf->vf_num);
+				v_status = VIRTCHNL_STATUS_ERR_PARAM;
+				goto done;
+			}
+
+			vf->tx_irqvs[vector].me = vector;
+
+			txq = &vsi->tx_queues[bit];
+			txq->irqv = &vf->tx_irqvs[vector];
+			txq->itr_idx = vvm->txitr_idx;
+		}
+		/* coverity[address_of] */
+		for_each_set_bit(bit, &vvm->rxq_map, ICE_VIRTCHNL_QUEUE_MAP_SIZE) {
+			if (bit >= vsi->num_rx_queues) {
+				device_printf(sc->dev,
+				    "%s: VF-%d: rxq map has invalid bit set\n",
+				    __func__, vf->vf_num);
+				v_status = VIRTCHNL_STATUS_ERR_PARAM;
+				goto done;
+			}
+			vf->rx_irqvs[vector].me = vector;
+
+			rxq = &vsi->rx_queues[bit];
+			rxq->irqv = &vf->rx_irqvs[vector];
+			rxq->itr_idx = vvm->rxitr_idx;
+		}
+	}
+
+	/* Write to T/RQCTL registers to actually map vectors to queues */
+	for (int i = 0; i < vf->vsi->num_rx_queues; i++)
+		if (vsi->rx_queues[i].irqv != NULL)
+			ice_configure_rxq_interrupt(hw, vsi->rx_qmap[i],
+			    vsi->rx_queues[i].irqv->me, vsi->rx_queues[i].itr_idx);
+
+	for (int i = 0; i < vf->vsi->num_tx_queues; i++)
+		if (vsi->tx_queues[i].irqv != NULL)
+			ice_configure_txq_interrupt(hw, vsi->tx_qmap[i],
+			    vsi->tx_queues[i].irqv->me, vsi->tx_queues[i].itr_idx);
+
+	ice_flush(hw);
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_eth_stats_to_virtchnl_eth_stats - Convert stats for virtchnl
+ * @istats: VSI stats from HW to convert
+ * @vstats: stats struct to copy to
+ *
+ * This function copies all known stats in struct virtchnl_eth_stats from the
+ * input struct ice_eth_stats to an output struct virtchnl_eth_stats.
+ *
+ * @remark These two structure types currently have the same definition up to
+ * the size of struct virtchnl_eth_stats (on FreeBSD), but that could change
+ * in the future.
+ */
+static void
+ice_eth_stats_to_virtchnl_eth_stats(struct ice_eth_stats *istats,
+				    struct virtchnl_eth_stats *vstats)
+{
+	vstats->rx_bytes = istats->rx_bytes;
+	vstats->rx_unicast = istats->rx_unicast;
+	vstats->rx_multicast = istats->rx_multicast;
+	vstats->rx_broadcast = istats->rx_broadcast;
+	vstats->rx_discards = istats->rx_discards;
+	vstats->rx_unknown_protocol = istats->rx_unknown_protocol;
+	vstats->tx_bytes = istats->tx_bytes;
+	vstats->tx_unicast = istats->tx_unicast;
+	vstats->tx_multicast = istats->tx_multicast;
+	vstats->tx_broadcast = istats->tx_broadcast;
+	vstats->tx_discards = istats->tx_discards;
+	vstats->tx_errors = istats->tx_errors;
+}
+
+/**
+ * ice_vc_get_stats_msg - Handle VIRTCHNL_OP_GET_STATS msg
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ * @msg_buf: raw message buffer from the VF
+ *
+ * Updates the VF's VSI stats and sends those stats back to the VF.
+ */
+static void
+ice_vc_get_stats_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct virtchnl_queue_select *vqs;
+	struct virtchnl_eth_stats stats;
+	struct ice_vsi *vsi = vf->vsi;
+	struct ice_hw *hw = &sc->hw;
+
+	vqs = (struct virtchnl_queue_select *)msg_buf;
+
+	if (vqs->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+		    "%s: VF-%d: message has invalid VSI ID %d (VF has VSI ID %d)\n",
+		    __func__, vf->vf_num, vqs->vsi_id, vsi->idx);
+		ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_GET_STATS,
+		    VIRTCHNL_STATUS_ERR_PARAM, NULL, 0, NULL);
+	}
+
+	ice_update_vsi_hw_stats(vf->vsi);
+	ice_eth_stats_to_virtchnl_eth_stats(&vsi->hw_stats.cur, &stats);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_GET_STATS,
+	    VIRTCHNL_STATUS_SUCCESS, (u8 *)&stats,
+	    sizeof(struct virtchnl_eth_stats), NULL);
+}
+
+/**
+ * ice_vc_cfg_promisc_mode_msg - Handle VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
+ * @sc: PF's softc structure
+ * @vf: VF tracking structure
+ * @msg_buf: message buffer from VF
+ *
+ * Configures the promiscuous modes for the given VSI in msg_buf.
+ */
+static void
+ice_vc_cfg_promisc_mode_msg(struct ice_softc *sc, struct ice_vf *vf, u8 *msg_buf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct virtchnl_promisc_info *vpi;
+	enum virtchnl_status_code v_status = VIRTCHNL_STATUS_SUCCESS;
+	int status = 0;
+	struct ice_vsi *vsi = vf->vsi;
+	ice_declare_bitmap(old_promisc_mask, ICE_PROMISC_MAX);
+	ice_declare_bitmap(req_promisc_mask, ICE_PROMISC_MAX);
+	ice_declare_bitmap(clear_promisc_mask, ICE_PROMISC_MAX);
+	ice_declare_bitmap(set_promisc_mask, ICE_PROMISC_MAX);
+	ice_declare_bitmap(old_req_xor_mask, ICE_PROMISC_MAX);
+	u16 vid;
+
+	vpi = (struct virtchnl_promisc_info *)msg_buf;
+
+	/* Check to see if VF has permission to configure promiscuous mode */
+	if (!(vf->vf_flags & VF_FLAG_PROMISC_CAP)) {
+		device_printf(sc->dev,
+			      "VF-%d: attempted to configure promiscuous mode\n",
+			      vf->vf_num);
+		/* Don't reply to VF with an error */
+		goto done;
+	}
+
+	if (vpi->vsi_id != vsi->idx) {
+		device_printf(sc->dev,
+			      "VF-%d: Message has invalid VSI ID (expected %d, got %d)\n",
+			      vf->vf_num, vsi->idx, vpi->vsi_id);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+	}
+
+	if (vpi->flags & ~ICE_VIRTCHNL_VALID_PROMISC_FLAGS) {
+		device_printf(sc->dev,
+			      "VF-%d: Message has invalid promiscuous flags set (valid 0x%02x, got 0x%02x)\n",
+			      vf->vf_num, ICE_VIRTCHNL_VALID_PROMISC_FLAGS,
+			      vpi->flags);
+		v_status = VIRTCHNL_STATUS_ERR_PARAM;
+		goto done;
+
+	}
+
+	ice_zero_bitmap(req_promisc_mask, ICE_PROMISC_MAX);
+	/* Convert virtchnl flags to ice AQ promiscuous mode flags */
+	if (vpi->flags & FLAG_VF_UNICAST_PROMISC) {
+		ice_set_bit(ICE_PROMISC_UCAST_TX, req_promisc_mask);
+		ice_set_bit(ICE_PROMISC_UCAST_RX, req_promisc_mask);
+	}
+	if (vpi->flags & FLAG_VF_MULTICAST_PROMISC) {
+		ice_set_bit(ICE_PROMISC_MCAST_TX, req_promisc_mask);
+		ice_set_bit(ICE_PROMISC_MCAST_RX, req_promisc_mask);
+	}
+
+	status = ice_get_vsi_promisc(hw, vsi->idx, old_promisc_mask, &vid);
+	if (status) {
+		device_printf(sc->dev,
+			      "VF-%d: Failed to get promiscuous mode mask for VSI %d, err %s aq_err %s\n",
+			      vf->vf_num, vsi->idx,
+			      ice_status_str(status),
+			      ice_aq_str(hw->adminq.sq_last_status));
+		v_status = ice_iov_err_to_virt_err(status);
+		goto done;
+	}
+
+	/* Figure out what got added and what got removed */
+	ice_zero_bitmap(old_req_xor_mask, ICE_PROMISC_MAX);
+	ice_xor_bitmap(old_req_xor_mask, old_promisc_mask, req_promisc_mask, ICE_PROMISC_MAX);
+	ice_and_bitmap(clear_promisc_mask, old_req_xor_mask, old_promisc_mask, ICE_PROMISC_MAX);
+	ice_and_bitmap(set_promisc_mask, old_req_xor_mask, req_promisc_mask, ICE_PROMISC_MAX);
+
+	if (ice_is_any_bit_set(clear_promisc_mask, ICE_PROMISC_MAX)) {
+		status = ice_clear_vsi_promisc(hw, vsi->idx,
+					       clear_promisc_mask, 0);
+		if (status) {
+			device_printf(sc->dev,
+				      "VF-%d: Failed to clear promiscuous mode for VSI %d, err %s aq_err %s\n",
+				      vf->vf_num, vsi->idx,
+				      ice_status_str(status),
+				      ice_aq_str(hw->adminq.sq_last_status));
+			v_status = ice_iov_err_to_virt_err(status);
+			goto done;
+		}
+	}
+
+	if (ice_is_any_bit_set(set_promisc_mask, ICE_PROMISC_MAX)) {
+		status = ice_set_vsi_promisc(hw, vsi->idx, set_promisc_mask, 0);
+		if (status) {
+			device_printf(sc->dev,
+				      "VF-%d: Failed to set promiscuous mode for VSI %d, err %s aq_err %s\n",
+				      vf->vf_num, vsi->idx,
+				      ice_status_str(status),
+				      ice_aq_str(hw->adminq.sq_last_status));
+			v_status = ice_iov_err_to_virt_err(status);
+			goto done;
+		}
+	}
+
+done:
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+	    v_status, NULL, 0, NULL);
+}
+
+/**
+ * ice_vc_notify_all_vfs_link_state - Notify all VFs of PF link state
+ * @sc: device private structure
+ *
+ * Sends a message to all VFs about the status of the PF's link
+ * state. For more details, @see ice_vc_notify_vf_link_state.
+ */
+void
+ice_vc_notify_all_vfs_link_state(struct ice_softc *sc)
+{
+	for (int i = 0; i < sc->num_vfs; i++)
+		ice_vc_notify_vf_link_state(sc, &sc->vfs[i]);
+}
+
+/**
+ * ice_vc_notify_vf_link_state - Notify VF of PF link state
+ * @sc: device private structure
+ * @vf: VF tracking structure
+ *
+ * Sends an event message to the specified VF with information about
+ * the current link state from the PF's port. This includes whether
+ * link is up or down, and the link speed in 100Mbps units.
+ */
+static void
+ice_vc_notify_vf_link_state(struct ice_softc *sc, struct ice_vf *vf)
+{
+	struct virtchnl_pf_event event = {};
+	struct ice_hw *hw = &sc->hw;
+
+	event.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	event.severity = PF_EVENT_SEVERITY_INFO;
+	event.event_data.link_event_adv.link_status = sc->link_up;
+	event.event_data.link_event_adv.link_speed =
+		(u32)ice_conv_link_speed_to_virtchnl(true,
+		    hw->port_info->phy.link_info.link_speed);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_num, VIRTCHNL_OP_EVENT,
+	    VIRTCHNL_STATUS_SUCCESS, (u8 *)&event, sizeof(event), NULL);
+}
+
+/**
+ * ice_vc_handle_vf_msg - Handle a message from a VF
+ * @sc: device private structure
+ * @event: event received from the HW MBX queue
+ *
+ * Called whenever an event is received from a VF on the HW mailbox queue.
+ * Responsible for handling these messages as well as responding to the
+ * VF afterwards, depending on the received message type.
+ */
+void
+ice_vc_handle_vf_msg(struct ice_softc *sc, struct ice_rq_event_info *event)
+{
+	struct ice_hw *hw = &sc->hw;
+	device_t dev = sc->dev;
+	struct ice_vf *vf;
+	int err = 0;
+
+	u32 v_opcode = event->desc.cookie_high;
+	u16 v_id = event->desc.retval;
+	u8 *msg = event->msg_buf;
+	u16 msglen = event->msg_len;
+
+	if (v_id >= sc->num_vfs) {
+		device_printf(dev, "%s: Received msg from invalid VF-%d: opcode %d, len %d\n",
+		    __func__, v_id, v_opcode, msglen);
+		return;
+	}
+
+	vf = &sc->vfs[v_id];
+
+	/* Perform basic checks on the msg */
+	err = virtchnl_vc_validate_vf_msg(&vf->version, v_opcode, msg, msglen);
+	if (err) {
+		device_printf(dev, "%s: Received invalid msg from VF-%d: opcode %d, len %d, error %d\n",
+		    __func__, vf->vf_num, v_opcode, msglen, err);
+		ice_aq_send_msg_to_vf(hw, v_id, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, NULL, 0, NULL);
+		return;
+	}
+
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		ice_vc_version_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		ice_reset_vf(sc, vf, true);
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		ice_vc_get_vf_res_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		ice_vc_add_eth_addr_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		ice_vc_del_eth_addr_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+		ice_vc_add_vlan_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN:
+		ice_vc_del_vlan_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		ice_vc_cfg_vsi_qs_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		ice_vc_cfg_rss_key_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		ice_vc_cfg_rss_lut_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_SET_RSS_HENA:
+		ice_vc_set_rss_hena_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		ice_vc_enable_queues_msg(sc, vf, msg);
+		ice_vc_notify_vf_link_state(sc, vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		ice_vc_disable_queues_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		ice_vc_cfg_irq_map_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		ice_vc_get_stats_msg(sc, vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		ice_vc_cfg_promisc_mode_msg(sc, vf, msg);
+		break;
+	default:
+		device_printf(dev, "%s: Received unknown msg from VF-%d: opcode %d, len %d\n",
+		    __func__, vf->vf_num, v_opcode, msglen);
+		ice_aq_send_msg_to_vf(hw, v_id, v_opcode,
+		    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0, NULL);
+		break;
+	}
+}
+
+/**
+ * ice_iov_setup_intr_mapping - Setup interrupt config for a VF
+ * @sc: device softc structure
+ * @vf: driver's VF structure for VF to be configured
+ *
+ * Before a VF can be used, and after a VF reset, the PF must configure
+ * the VF's interrupt allocation registers. This includes allocating
+ * interrupts from the PF's interrupt pool to the VF using the
+ * VPINT_ALLOC(_PCI) registers, and setting up a mapping from PF vectors
+ * to VF vectors in GLINT_VECT2FUNC.
+ *
+ * As well, this sets up queue allocation registers and maps the mailbox
+ * interrupt for the VF.
+ */
+static void
+ice_iov_setup_intr_mapping(struct ice_softc *sc, struct ice_vf *vf)
+{
+	struct ice_hw *hw = &sc->hw;
+	struct ice_vsi *vsi = vf->vsi;
+	u16 v;
+
+	/* Calculate indices for register ops below */
+	u16 vf_first_irq_idx = vf->vf_imap[0];
+	u16 vf_last_irq_idx = (vf_first_irq_idx + vf->num_irq_vectors) - 1;
+	u16 abs_vf_first_irq_idx = hw->func_caps.common_cap.msix_vector_first_id +
+	    vf_first_irq_idx;
+	u16 abs_vf_last_irq_idx = (abs_vf_first_irq_idx + vf->num_irq_vectors) - 1;
+	u16 abs_vf_num = vf->vf_num + hw->func_caps.vf_base_id;
+
+	/* Map out VF interrupt allocation in global device space. Both
+	 * VPINT_ALLOC and VPINT_ALLOC_PCI use the same values.
+	 */
+	wr32(hw, VPINT_ALLOC(vf->vf_num),
+	    (((abs_vf_first_irq_idx << VPINT_ALLOC_FIRST_S) & VPINT_ALLOC_FIRST_M) |
+	    ((abs_vf_last_irq_idx << VPINT_ALLOC_LAST_S) & VPINT_ALLOC_LAST_M) |
+	    VPINT_ALLOC_VALID_M));
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_num),
+	    (((abs_vf_first_irq_idx << VPINT_ALLOC_PCI_FIRST_S) & VPINT_ALLOC_PCI_FIRST_M) |
+	    ((abs_vf_last_irq_idx << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) |
+	    VPINT_ALLOC_PCI_VALID_M));
+
+	/* Create inverse mapping of vectors to PF/VF combinations */
+	for (v = vf_first_irq_idx; v <= vf_last_irq_idx; v++)
+	{
+		wr32(hw, GLINT_VECT2FUNC(v),
+		    (((abs_vf_num << GLINT_VECT2FUNC_VF_NUM_S) & GLINT_VECT2FUNC_VF_NUM_M) |
+		     ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & GLINT_VECT2FUNC_PF_NUM_M)));
+	}
+
+	/* Map mailbox interrupt to MSI-X index 0. Disable ITR for it, too. */
+	wr32(hw, VPINT_MBX_CTL(abs_vf_num),
+	    ((0 << VPINT_MBX_CTL_MSIX_INDX_S) & VPINT_MBX_CTL_MSIX_INDX_M) |
+	    ((0x3 << VPINT_MBX_CTL_ITR_INDX_S) & VPINT_MBX_CTL_ITR_INDX_M) |
+	    VPINT_MBX_CTL_CAUSE_ENA_M);
+
+	/* Mark the TX queue mapping registers as valid */
+	wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_num), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
+	/* Indicate to HW that VF has scattered queue allocation */
+	wr32(hw, VPLAN_TX_QBASE(vf->vf_num), VPLAN_TX_QBASE_VFQTABLE_ENA_M);
+	for (int i = 0; i < vsi->num_tx_queues; i++) {
+		wr32(hw, VPLAN_TX_QTABLE(i, vf->vf_num),
+		    (vsi->tx_qmap[i] << VPLAN_TX_QTABLE_QINDEX_S) & VPLAN_TX_QTABLE_QINDEX_M);
+	}
+
+	/* Mark the RX queue mapping registers as valid */
+	wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_num), VPLAN_RXQ_MAPENA_RX_ENA_M);
+	wr32(hw, VPLAN_RX_QBASE(vf->vf_num), VPLAN_RX_QBASE_VFQTABLE_ENA_M);
+	for (int i = 0; i < vsi->num_rx_queues; i++) {
+		wr32(hw, VPLAN_RX_QTABLE(i, vf->vf_num),
+		    (vsi->rx_qmap[i] << VPLAN_RX_QTABLE_QINDEX_S) & VPLAN_RX_QTABLE_QINDEX_M);
+	}
+}
+
+/**
+ * ice_err_to_virt err - translate ice errors into virtchnl errors
+ * @ice_err: status returned from ice function
+ */
+static enum virtchnl_status_code
+ice_iov_err_to_virt_err(int ice_err)
+{
+	switch (ice_err) {
+	case 0:
+		return VIRTCHNL_STATUS_SUCCESS;
+	case ICE_ERR_BAD_PTR:
+	case ICE_ERR_INVAL_SIZE:
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+	case ICE_ERR_PARAM:
+	case ICE_ERR_CFG:
+		return VIRTCHNL_STATUS_ERR_PARAM;
+	case ICE_ERR_NO_MEMORY:
+		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
+	case ICE_ERR_NOT_READY:
+	case ICE_ERR_RESET_FAILED:
+	case ICE_ERR_FW_API_VER:
+	case ICE_ERR_AQ_ERROR:
+	case ICE_ERR_AQ_TIMEOUT:
+	case ICE_ERR_AQ_FULL:
+	case ICE_ERR_AQ_NO_WORK:
+	case ICE_ERR_AQ_EMPTY:
+		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+	default:
+		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+	}
+}
diff --git a/sys/dev/ice/ice_iov.h b/sys/dev/ice/ice_iov.h
new file mode 100644
index 000000000000..c4fb3e932e3f
--- /dev/null
+++ b/sys/dev/ice/ice_iov.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*  Copyright (c) 2025, Intel Corporation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *   3. Neither the name of the Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived from
+ *      this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file ice_iov.h
+ * @brief header for IOV functionality
+ *
+ * This header includes definitions used to implement device Virtual Functions
+ * for the ice driver.
+ */
+
+#ifndef _ICE_IOV_H_
+#define _ICE_IOV_H_
+
+#include <sys/types.h>
+#include <sys/bus.h>
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
+#include <sys/param.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include <dev/pci/pci_iov.h>
+
+#include "ice_iflib.h"
+#include "ice_vf_mbx.h"
+
+/**
+ * @enum ice_vf_flags
+ * @brief VF state flags
+ *
+ * Used to indicate the status of a PF's VF, as well as indicating what each VF
+ * is capabile of. Intended to be modified only using atomic operations, so
+ * they can be read and modified in places that aren't locked.
+ *
+ * Used in struct ice_vf's vf_flags field.
+ */
+enum ice_vf_flags {
+	VF_FLAG_ENABLED			= BIT(0),
+	VF_FLAG_SET_MAC_CAP		= BIT(1),
+	VF_FLAG_VLAN_CAP		= BIT(2),
+	VF_FLAG_PROMISC_CAP		= BIT(3),
+	VF_FLAG_MAC_ANTI_SPOOF		= BIT(4),
+};
+
+/**
+ * @struct ice_vf
+ * @brief PF's VF software context
+ *
+ * Represents the state and options for a VF spawned from a PF.
+ */
+struct ice_vf {
+	struct ice_vsi *vsi;
+	u32 vf_flags;
+
+	u8 mac[ETHER_ADDR_LEN];
+	u16 vf_num;
+	struct virtchnl_version_info version;
+
+	u16 mac_filter_limit;
+	u16 mac_filter_cnt;
+	u16 vlan_limit;
+	u16 vlan_cnt;
+
+	u16 num_irq_vectors;
+	u16 *vf_imap;
+	struct ice_irq_vector *tx_irqvs;
+	struct ice_irq_vector *rx_irqvs;
+};
+
+#define ICE_PCIE_DEV_STATUS			0xAA
+
+#define ICE_PCI_CIAD_WAIT_COUNT			100
+#define ICE_PCI_CIAD_WAIT_DELAY_US		1
+#define ICE_VPGEN_VFRSTAT_WAIT_COUNT		100
+#define ICE_VPGEN_VFRSTAT_WAIT_DELAY_US		20
+
+#define ICE_VIRTCHNL_VALID_PROMISC_FLAGS	(FLAG_VF_UNICAST_PROMISC | \
+						 FLAG_VF_MULTICAST_PROMISC)
+
+#define ICE_DEFAULT_VF_VLAN_LIMIT			64
+#define ICE_DEFAULT_VF_FILTER_LIMIT			16
+
+int ice_iov_attach(struct ice_softc *sc);
+int ice_iov_detach(struct ice_softc *sc);
+
+int ice_iov_init(struct ice_softc *sc, uint16_t num_vfs, const nvlist_t *params);
+int ice_iov_add_vf(struct ice_softc *sc, uint16_t vfnum, const nvlist_t *params);
+void ice_iov_uninit(struct ice_softc *sc);
+
+void ice_iov_handle_vflr(struct ice_softc *sc);
+
+void ice_vc_handle_vf_msg(struct ice_softc *sc, struct ice_rq_event_info *event);
+void ice_vc_notify_all_vfs_link_state(struct ice_softc *sc);
+
+#endif /* _ICE_IOV_H_ */
+
diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c
index d44ae5f37750..442111e5ffaf 100644
--- a/sys/dev/ice/ice_lib.c
+++ b/sys/dev/ice/ice_lib.c
@@ -42,6 +42,9 @@
 
 #include "ice_lib.h"
 #include "ice_iflib.h"
+#ifdef PCI_IOV
+#include "ice_iov.h"
+#endif
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <machine/resource.h>
@@ -741,6 +744,12 @@ ice_initialize_vsi(struct ice_vsi *vsi)
 	case ICE_VSI_VMDQ2:
 		ctx.flags = ICE_AQ_VSI_TYPE_VMDQ2;
 		break;
+#ifdef PCI_IOV
+	case ICE_VSI_VF:
+		ctx.flags = ICE_AQ_VSI_TYPE_VF;
+		ctx.vf_num = vsi->vf_num;
+		break;
+#endif
 	default:
 		return (ENODEV);
 	}
@@ -1607,6 +1616,12 @@ ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf
 	case ICE_VSI_VMDQ2:
 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
 		break;
+#ifdef PCI_IOV
+	case ICE_VSI_VF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_num;
+		break;
+#endif
 	default:
 		return (ENODEV);
 	}
@@ -1660,6 +1675,10 @@ ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
 		struct ice_tlan_ctx tlan_ctx = { 0 };
 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
 
+		/* Last configured queue */
+		if (txq->desc_count == 0)
+			break;
+
 		pf_q = vsi->tx_qmap[txq->me];
 		qg->txqs[0].txq_id = htole16(pf_q);
 
@@ -1788,6 +1807,10 @@ ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
 
 	for (i = 0; i < vsi->num_rx_queues; i++) {
 		MPASS(vsi->mbuf_sz > 0);
+		/* Last configured queue */
+		if (vsi->rx_queues[i].desc_count == 0)
+			break;
+
 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
 		if (err)
 			return err;
@@ -2257,6 +2280,11 @@ ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
 	case ice_aqc_opc_get_link_status:
 		ice_process_link_event(sc, event);
 		break;
+#ifdef PCI_IOV
+	case ice_mbx_opc_send_msg_to_pf:
+		ice_vc_handle_vf_msg(sc, event);
+		break;
+#endif
 	case ice_aqc_opc_fw_logs_event:
 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
 		break;
diff --git a/sys/dev/ice/ice_lib.h b/sys/dev/ice/ice_lib.h
index b6b23ec82161..308b2bda2790 100644
--- a/sys/dev/ice/ice_lib.h
+++ b/sys/dev/ice/ice_lib.h
@@ -611,6 +611,10 @@ struct ice_vsi {
 	u16 mirror_src_vsi;
 	u16 rule_mir_ingress;
 	u16 rule_mir_egress;
+
+#ifdef PCI_IOV
+	u8 vf_num;		/* Index of owning VF, if applicable */
+#endif
 };
 
 /**
diff --git a/sys/dev/ice/ice_vf_mbx.c b/sys/dev/ice/ice_vf_mbx.c
new file mode 100644
index 000000000000..387a1c6739a6
--- /dev/null
+++ b/sys/dev/ice/ice_vf_mbx.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*  Copyright (c) 2025, Intel Corporation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *   3. Neither the name of the Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived from
+ *      this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ice_common.h"
+#include "ice_hw_autogen.h"
+#include "ice_vf_mbx.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_pf_vf_msg *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+	cmd = &desc.params.virt;
+	cmd->id = CPU_TO_LE32(vfid);
+
+	desc.cookie_high = CPU_TO_LE32(v_opcode);
+	desc.cookie_low = CPU_TO_LE32(v_retval);
+
+	if (msglen)
+		desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
+
+	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+/**
+ * ice_aq_send_msg_to_pf
+ * @hw: pointer to the hardware structure
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to PF driver using mailbox queue. By default, this
+ * message is sent asynchronously, i.e. ice_sq_send_cmd()
+ * does not wait for completion before returning.
+ */
+int
+ice_aq_send_msg_to_pf(struct ice_hw *hw, enum virtchnl_ops v_opcode,
+		      int v_retval, u8 *msg, u16 msglen,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_pf);
+	desc.cookie_high = CPU_TO_LE32(v_opcode);
+	desc.cookie_low = CPU_TO_LE32(v_retval);
+
+	if (msglen)
+		desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
+
+	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+static const u32 ice_legacy_aq_to_vc_speed[] = {
+	VIRTCHNL_LINK_SPEED_100MB,	/* BIT(0) */
+	VIRTCHNL_LINK_SPEED_100MB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_10GB,
+	VIRTCHNL_LINK_SPEED_20GB,
+	VIRTCHNL_LINK_SPEED_25GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+};
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+	/* convert a BIT() value into an array index */
+	u16 index = (u16)(ice_fls(link_speed) - 1);
+
+	if (adv_link_support)
+		return ice_get_link_speed(index);
+	else if (index < ARRAY_SIZE(ice_legacy_aq_to_vc_speed))
+		/* Virtchnl speeds are not defined for every speed supported in
+		 * the hardware. To maintain compatibility with older AVF
+		 * drivers, while reporting the speed the new speed values are
+		 * resolved to the closest known virtchnl speeds
+		 */
+		return ice_legacy_aq_to_vc_speed[index];
+
+	return VIRTCHNL_LINK_SPEED_UNKNOWN;
+}
+
+/* The mailbox overflow detection algorithm helps to check if there
+ * is a possibility of a malicious VF transmitting too many MBX messages to the
+ * PF.
+ * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
+ * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
+ * The struct ice_mbx_snapshot helps to track and traverse a static window of
+ * messages within the mailbox queue while looking for a malicious VF.
+ *
+ * 2. When the caller starts processing its mailbox queue in response to an
+ * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
+ * the algorithm can be run for the first time for that interrupt. This
+ * requires calling ice_mbx_reset_snapshot() as well as calling
+ * ice_mbx_reset_vf_info() for each VF tracking structure.
+ *
+ * 3. For every message read by the caller from the MBX Queue, the caller must
+ * call the detection algorithm's entry function ice_mbx_vf_state_handler().
+ * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
+ * filled as it is required to be passed to the algorithm.
+ *
+ * 4. Every time a message is read from the MBX queue, a tracking structure
+ * for the VF must be passed to the state handler. The boolean output
+ * report_malvf from ice_mbx_vf_state_handler() serves as an indicator to the
+ * caller whether it must report this VF as malicious or not.
+ *
+ * 5. When a VF is identified to be malicious, the caller can send a message
+ * to the system administrator.
+ *
+ * 6. The PF is responsible for maintaining the struct ice_mbx_vf_info
+ * structure for each VF. The PF should clear the VF tracking structure if the
+ * VF is reset. When a VF is shut down and brought back up, we will then
+ * assume that the new VF is not malicious and may report it again if we
+ * detect it again.
+ *
+ * 7. The function ice_mbx_reset_snapshot() is called to reset the information
+ * in ice_mbx_snapshot for every new mailbox interrupt handled.
+ */
+#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
+/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
+ * the max messages check must be ignored in the algorithm
+ */
+#define ICE_IGNORE_MAX_MSG_CNT	0xFFFF
+
+/**
+ * ice_mbx_reset_snapshot - Initialize mailbox snapshot structure
+ * @snap: pointer to the mailbox snapshot
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
+{
+	struct ice_mbx_vf_info *vf_info;
+
+	/* Clear mbx_buf in the mailbox snaphot structure and setting the
+	 * mailbox snapshot state to a new capture.
+	 */
+	ice_memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf), ICE_NONDMA_MEM);
+	snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+
+	/* Reset message counts for all VFs to zero */
+	LIST_FOR_EACH_ENTRY(vf_info, &snap->mbx_vf, ice_mbx_vf_info, list_entry)
+		vf_info->msg_count = 0;
+}
+
+/**
+ * ice_mbx_traverse - Pass through mailbox snapshot
+ * @hw: pointer to the HW struct
+ * @new_state: new algorithm state
+ *
+ * Traversing the mailbox static snapshot without checking
+ * for malicious VFs.
+ */
+static void
+ice_mbx_traverse(struct ice_hw *hw,
+		 enum ice_mbx_snapshot_state *new_state)
+{
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	u32 num_iterations;
+
+	snap_buf = &hw->mbx_snapshot.mbx_buf;
+
+	/* As mailbox buffer is circular, applying a mask
+	 * on the incremented iteration count.
+	 */
+	num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
+
+	/* Checking either of the below conditions to exit snapshot traversal:
+	 * Condition-1: If the number of iterations in the mailbox is equal to
+	 * the mailbox head which would indicate that we have reached the end
+	 * of the static snapshot.
+	 * Condition-2: If the maximum messages serviced in the mailbox for a
+	 * given interrupt is the highest possible value then there is no need
+	 * to check if the number of messages processed is equal to it. If not
+	 * check if the number of messages processed is greater than or equal
+	 * to the maximum number of mailbox entries serviced in current work item.
+	 */
+	if (num_iterations == snap_buf->head ||
+	    (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
+	     ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
+		*new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_detect_malvf - Detect malicious VF in snapshot
+ * @hw: pointer to the HW struct
+ * @vf_info: mailbox tracking structure for a VF
+ * @new_state: new algorithm state
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * This function tracks the number of asynchronous messages
+ * sent per VF and marks the VF as malicious if it exceeds
+ * the permissible number of messages to send.
+ */
+static int
+ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info,
+		     enum ice_mbx_snapshot_state *new_state,
+		     bool *is_malvf)
+{
+	/* increment the message count for this VF */
+	vf_info->msg_count++;
+
+	if (vf_info->msg_count >= ICE_ASYNC_VF_MSG_THRESHOLD)
+		*is_malvf = true;
+
+	/* continue to iterate through the mailbox snapshot */
+	ice_mbx_traverse(hw, new_state);
+
+	return 0;
+}
+
+/**
+ * ice_e830_mbx_vf_dec_trig - Decrements the VF mailbox queue counter
+ * @hw: pointer to the HW struct
+ * @event: pointer to the control queue receive event
+ *
+ * This function triggers to decrement the counter
+ * MBX_VF_IN_FLIGHT_MSGS_AT_PF_CNT when the driver replenishes
+ * the buffers at the PF mailbox queue.
+ */
+void ice_e830_mbx_vf_dec_trig(struct ice_hw *hw,
+			      struct ice_rq_event_info *event)
+{
+	u16 vfid = LE16_TO_CPU(event->desc.retval);
+
+	wr32(hw, E830_MBX_VF_DEC_TRIG(vfid), 1);
+}
+
+/**
+ * ice_mbx_vf_clear_cnt_e830 - Clear the VF mailbox queue count
+ * @hw: pointer to the HW struct
+ * @vf_id: VF ID in the PF space
+ *
+ * This function clears the counter MBX_VF_IN_FLIGHT_MSGS_AT_PF_CNT, and should
+ * be called when a VF is created and on VF reset.
+ */
+void ice_mbx_vf_clear_cnt_e830(struct ice_hw *hw, u16 vf_id)
+{
+	u32 reg = rd32(hw, E830_MBX_VF_IN_FLIGHT_MSGS_AT_PF_CNT(vf_id));
+
+	wr32(hw, E830_MBX_VF_DEC_TRIG(vf_id), reg);
+}
+
+/**
+ * ice_mbx_vf_state_handler - Handle states of the overflow algorithm
+ * @hw: pointer to the HW struct
+ * @mbx_data: pointer to structure containing mailbox data
+ * @vf_info: mailbox tracking structure for the VF in question
+ * @report_malvf: boolean output to indicate whether VF should be reported
+ *
+ * The function serves as an entry point for the malicious VF
+ * detection algorithm by handling the different states and state
+ * transitions of the algorithm:
+ * New snapshot: This state is entered when creating a new static
+ * snapshot. The data from any previous mailbox snapshot is
+ * cleared and a new capture of the mailbox head and tail is
+ * logged. This will be the new static snapshot to detect
+ * asynchronous messages sent by VFs. On capturing the snapshot
+ * and depending on whether the number of pending messages in that
+ * snapshot exceed the watermark value, the state machine enters
+ * traverse or detect states.
+ * Traverse: If pending message count is below watermark then iterate
+ * through the snapshot without any action on VF.
+ * Detect: If pending message count exceeds watermark traverse
+ * the static snapshot and look for a malicious VF.
+ */
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+			 struct ice_mbx_vf_info *vf_info, bool *report_malvf)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+	enum ice_mbx_snapshot_state new_state;
+	int status = 0;
+	bool is_malvf = false;
+
+	if (!report_malvf || !mbx_data || !vf_info)
+		return ICE_ERR_BAD_PTR;
+
+	*report_malvf = false;
+
+	/* When entering the mailbox state machine assume that the VF
+	 * is not malicious until detected.
+	 */
+	 /* Checking if max messages allowed to be processed while servicing current
+	  * interrupt is not less than the defined AVF message threshold.
+	  */
+	if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD)
+		return ICE_ERR_INVAL_SIZE;
+
+	/* The watermark value should not be lesser than the threshold limit
+	 * set for the number of asynchronous messages a VF can send to mailbox
+	 * nor should it be greater than the maximum number of messages in the
+	 * mailbox serviced in current interrupt.
+	 */
+	if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD ||
+	    mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx)
+		return ICE_ERR_PARAM;
+
+	new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+	snap_buf = &snap->mbx_buf;
+
+	switch (snap_buf->state) {
+	case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT:
+		/* Clear any previously held data in mailbox snapshot structure. */
+		ice_mbx_reset_snapshot(snap);
+
+		/* Collect the pending ARQ count, number of messages processed and
+		 * the maximum number of messages allowed to be processed from the
+		 * Mailbox for current interrupt.
+		 */
+		snap_buf->num_pending_arq = mbx_data->num_pending_arq;
+		snap_buf->num_msg_proc = mbx_data->num_msg_proc;
+		snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx;
+
+		/* Capture a new static snapshot of the mailbox by logging the
+		 * head and tail of snapshot and set num_iterations to the tail
+		 * value to mark the start of the iteration through the snapshot.
+		 */
+		snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean +
+						  mbx_data->num_pending_arq);
+		snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1);
+		snap_buf->num_iterations = snap_buf->tail;
+
+		/* Pending ARQ messages returned by ice_clean_rq_elem
+		 * is the difference between the head and tail of the
+		 * mailbox queue. Comparing this value against the watermark
+		 * helps to check if we potentially have malicious VFs.
+		 */
+		if (snap_buf->num_pending_arq >=
+		    mbx_data->async_watermark_val) {
+			new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+			status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
+		} else {
+			new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+			ice_mbx_traverse(hw, &new_state);
+		}
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_TRAVERSE:
+		new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+		ice_mbx_traverse(hw, &new_state);
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_DETECT:
+		new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+		status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
+		break;
+
+	default:
+		new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+		status = ICE_ERR_CFG;
+	}
+
+	snap_buf->state = new_state;
+
+	/* Only report VFs as malicious the first time we detect it */
+	if (is_malvf && !vf_info->malicious) {
+		vf_info->malicious = 1;
+		*report_malvf = true;
+	}
+
+	return status;
+}
+
+/**
+ * ice_mbx_clear_malvf - Clear VF mailbox info
+ * @vf_info: the mailbox tracking structure for a VF
+ *
+ * In case of a VF reset, this function shall be called to clear the VF's
+ * current mailbox tracking state.
+ */
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info)
+{
+	vf_info->malicious = 0;
+	vf_info->msg_count = 0;
+}
+
+/**
+ * ice_mbx_init_vf_info - Initialize a new VF mailbox tracking info
+ * @hw: pointer to the hardware structure
+ * @vf_info: the mailbox tracking info structure for a VF
+ *
+ * Initialize a VF mailbox tracking info structure and insert it into the
+ * snapshot list.
+ *
+ * If you remove the VF, you must also delete the associated VF info structure
+ * from the linked list.
+ */
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	ice_mbx_clear_malvf(vf_info);
+	LIST_ADD(&vf_info->list_entry, &snap->mbx_vf);
+}
+
+/**
+ * ice_mbx_init_snapshot - Initialize mailbox snapshot data
+ * @hw: pointer to the hardware structure
+ *
+ * Clear the mailbox snapshot structure and initialize the VF mailbox list.
+ */
+void ice_mbx_init_snapshot(struct ice_hw *hw)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	INIT_LIST_HEAD(&snap->mbx_vf);
+	ice_mbx_reset_snapshot(snap);
+}
diff --git a/sys/dev/ice/ice_vf_mbx.h b/sys/dev/ice/ice_vf_mbx.h
new file mode 100644
index 000000000000..3b185ac89c11
--- /dev/null
+++ b/sys/dev/ice/ice_vf_mbx.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*  Copyright (c) 2025, Intel Corporation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice,
+ *      this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ *   3. Neither the name of the Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived from
+ *      this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ICE_VF_MBX_H_
+#define _ICE_VF_MBX_H_
+
+#include "ice_type.h"
+#include "ice_controlq.h"
+
+/* Defining the mailbox message threshold as 63 asynchronous
+ * pending messages. Normal VF functionality does not require
+ * sending more than 63 asynchronous pending message.
+ */
+
+ /* Threshold value should be used to initialize
+  * MBX_VF_IN_FLIGHT_MSGS_AT_PF_CNT register.
+  */
+#define ICE_ASYNC_VF_MSG_THRESHOLD	63
+
+int
+ice_aq_send_msg_to_pf(struct ice_hw *hw, enum virtchnl_ops v_opcode,
+		      int v_retval, u8 *msg, u16 msglen,
+		      struct ice_sq_cd *cd);
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd);
+
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+
+void ice_e830_mbx_vf_dec_trig(struct ice_hw *hw,
+			      struct ice_rq_event_info *event);
+void ice_mbx_vf_clear_cnt_e830(struct ice_hw *hw, u16 vf_id);
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+			 struct ice_mbx_vf_info *vf_info, bool *report_malvf);
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_snapshot(struct ice_hw *hw);
+#endif /* _ICE_VF_MBX_H_ */
diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c
index e60ee0f1c5c3..1469d2916465 100644
--- a/sys/dev/ice/if_ice_iflib.c
+++ b/sys/dev/ice/if_ice_iflib.c
@@ -42,6 +42,9 @@
 #include "ice_drv_info.h"
 #include "ice_switch.h"
 #include "ice_sched.h"
+#ifdef PCI_IOV
+#include "ice_iov.h"
+#endif
 
 #include <sys/module.h>
 #include <sys/sockio.h>
@@ -85,6 +88,12 @@ static int ice_if_suspend(if_ctx_t ctx);
 static int ice_if_resume(if_ctx_t ctx);
 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
 static void ice_init_link(struct ice_softc *sc);
+#ifdef PCI_IOV
+static int ice_if_iov_init(if_ctx_t ctx, uint16_t num_vfs, const nvlist_t *params);
+static void ice_if_iov_uninit(if_ctx_t ctx);
+static int ice_if_iov_vf_add(if_ctx_t ctx, uint16_t vfnum, const nvlist_t *params);
+static void ice_if_vflr_handle(if_ctx_t ctx);
+#endif
 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
@@ -158,6 +167,11 @@ static device_method_t ice_methods[] = {
 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
 	DEVMETHOD(device_suspend,  iflib_device_suspend),
 	DEVMETHOD(device_resume,   iflib_device_resume),
+#ifdef PCI_IOV
+	DEVMETHOD(pci_iov_init, iflib_device_iov_init),
+	DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit),
+	DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf),
+#endif
 	DEVMETHOD_END
 };
 
@@ -198,6 +212,12 @@ static device_method_t ice_iflib_methods[] = {
 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
 	DEVMETHOD(ifdi_resume, ice_if_resume),
 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
+#ifdef PCI_IOV
+	DEVMETHOD(ifdi_iov_vf_add, ice_if_iov_vf_add),
+	DEVMETHOD(ifdi_iov_init, ice_if_iov_init),
+	DEVMETHOD(ifdi_iov_uninit, ice_if_iov_uninit),
+	DEVMETHOD(ifdi_vflr_handle, ice_if_vflr_handle),
+#endif
 	DEVMETHOD_END
 };
 
@@ -733,6 +753,9 @@ ice_update_link_status(struct ice_softc *sc, bool update_media)
 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
 		}
+#ifdef PCI_IOV
+		ice_vc_notify_all_vfs_link_state(sc);
+#endif
 		update_media = true;
 	}
 
@@ -831,6 +854,14 @@ ice_if_attach_post(if_ctx_t ctx)
 
 	ice_add_device_sysctls(sc);
 
+#ifdef PCI_IOV
+	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_SRIOV)) {
+		err = ice_iov_attach(sc);
+		if (err == ENOMEM)
+			return (err);
+	}
+#endif /* PCI_IOV */
+
 	/* Get DCBX/LLDP state and start DCBX agent */
 	ice_init_dcb_setup(sc);
 
@@ -953,6 +984,11 @@ ice_if_detach(if_ctx_t ctx)
 		ice_destroy_mirror_interface(sc);
 	ice_rdma_pf_detach(sc);
 
+#ifdef PCI_IOV
+	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_SRIOV))
+		ice_iov_detach(sc);
+#endif /* PCI_IOV */
+
 	/* Free allocated media types */
 	ifmedia_removeall(sc->media);
 
@@ -1676,6 +1712,11 @@ ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
 	/* For future interrupt assignments */
 	sc->last_rid = rid + sc->irdma_vectors;
 
+#ifdef PCI_IOV
+	/* Create soft IRQ for handling VF resets */
+	iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, sc, 0, "iov");
+#endif
+
 	return (0);
 fail:
 	for (; i >= 0; i--, vector--)
@@ -2277,7 +2318,12 @@ ice_transition_recovery_mode(struct ice_softc *sc)
 	ice_rdma_pf_detach(sc);
 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 
+#ifdef PCI_IOV
+	if (ice_test_and_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en))
+		 ice_iov_detach(sc);
+#else
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
+#endif /* PCI_IOV */
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
 
 	ice_vsi_del_txqs_ctx(vsi);
@@ -2325,7 +2371,12 @@ ice_transition_safe_mode(struct ice_softc *sc)
 	ice_rdma_pf_detach(sc);
 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 
+#ifdef PCI_IOV
+	if (ice_test_and_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en))
+		 ice_iov_detach(sc);
+#else
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
+#endif /* PCI_IOV */
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
 
 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
@@ -2410,6 +2461,15 @@ ice_if_update_admin_status(if_ctx_t ctx)
 	/* Check and update link status */
 	ice_update_link_status(sc, false);
 
+#ifdef PCI_IOV
+	/*
+	 * Schedule VFs' reset handler after global resets
+	 * and other events were processed.
+	 */
+	if (ice_testandclear_state(&sc->state, ICE_STATE_VFLR_PENDING))
+		iflib_iov_intr_deferred(ctx);
+#endif
+
 	/*
 	 * If there are still messages to process, we need to reschedule
 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
@@ -3349,6 +3409,78 @@ ice_init_link(struct ice_softc *sc)
 
 }
 
+#ifdef PCI_IOV
+/**
+ * ice_if_iov_init - iov init handler for iflib
+ * @ctx: iflib context pointer
+ * @num_vfs: number of VFs to create
+ * @params: configuration parameters for the PF
+ *
+ * Configure the driver for SR-IOV mode. Used to setup things like memory
+ * before any VFs are created.
+ *
+ * @remark This is a wrapper for ice_iov_init
+ */
+static int
+ice_if_iov_init(if_ctx_t ctx, uint16_t num_vfs, const nvlist_t *params)
+{
+	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
+
+	return ice_iov_init(sc, num_vfs, params);
+}
+
+/**
+ * ice_if_iov_uninit - iov uninit handler for iflib
+ * @ctx: iflib context pointer
+ *
+ * Destroys VFs and frees their memory and resources.
+ *
+ * @remark This is a wrapper for ice_iov_uninit
+ */
+static void
+ice_if_iov_uninit(if_ctx_t ctx)
+{
+	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
+
+	ice_iov_uninit(sc);
+}
+
+/**
+ * ice_if_iov_vf_add - iov add vf handler for iflib
+ * @ctx: iflib context pointer
+ * @vfnum: index of VF to configure
+ * @params: configuration parameters for the VF
+ *
+ * Sets up the VF given by the vfnum index. This is called by the OS
+ * for each VF created by the PF driver after it is spawned.
+ *
+ * @remark This is a wrapper for ice_iov_vf_add
+ */
+static int
+ice_if_iov_vf_add(if_ctx_t ctx, uint16_t vfnum, const nvlist_t *params)
+{
+	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
+
+	return ice_iov_add_vf(sc, vfnum, params);
+}
+
+/**
+ * ice_if_vflr_handle - iov VFLR handler
+ * @ctx: iflib context pointer
+ *
+ * Performs the necessar teardown or setup required for a VF after
+ * a VFLR is initiated.
+ *
+ * @remark This is a wrapper for ice_iov_handle_vflr
+ */
+static void
+ice_if_vflr_handle(if_ctx_t ctx)
+{
+	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
+	ice_iov_handle_vflr(sc);
+}
+#endif /* PCI_IOV */
+
 extern struct if_txrx ice_subif_txrx;
 
 /**
diff --git a/sys/dev/ichiic/ig4_pci.c b/sys/dev/ichiic/ig4_pci.c
index 0195466150eb..3a49e220e335 100644
--- a/sys/dev/ichiic/ig4_pci.c
+++ b/sys/dev/ichiic/ig4_pci.c
@@ -186,6 +186,12 @@ static int ig4iic_pci_detach(device_t dev);
 #define PCI_CHIP_METEORLAKE_M_I2C_3	0x7e518086
 #define PCI_CHIP_METEORLAKE_M_I2C_4	0x7e7a8086
 #define PCI_CHIP_METEORLAKE_M_I2C_5	0x7e7b8086
+#define PCI_CHIP_ARROWLAKE_U_I2C_0      0x77788086
+#define PCI_CHIP_ARROWLAKE_U_I2C_1      0x77798086
+#define PCI_CHIP_ARROWLAKE_U_I2C_2      0x777a8086
+#define PCI_CHIP_ARROWLAKE_U_I2C_3      0x777b8086
+#define PCI_CHIP_ARROWLAKE_U_I2C_4      0x77508086
+#define PCI_CHIP_ARROWLAKE_U_I2C_5      0x77518086
 
 struct ig4iic_pci_device {
 	uint32_t	devid;
@@ -316,6 +322,12 @@ static struct ig4iic_pci_device ig4iic_pci_devices[] = {
 	{ PCI_CHIP_METEORLAKE_M_I2C_3, "Intel Meteor Lake-M I2C Controller-3", IG4_TIGERLAKE},
 	{ PCI_CHIP_METEORLAKE_M_I2C_4, "Intel Meteor Lake-M I2C Controller-4", IG4_TIGERLAKE},
 	{ PCI_CHIP_METEORLAKE_M_I2C_5, "Intel Meteor Lake-M I2C Controller-5", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_0, "Intel Arrow Lake-H/U I2C Controller-0", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_1, "Intel Arrow Lake-H/U I2C Controller-1", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_2, "Intel Arrow Lake-H/U I2C Controller-2", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_3, "Intel Arrow Lake-H/U I2C Controller-3", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_4, "Intel Arrow Lake-H/U I2C Controller-4", IG4_TIGERLAKE},
+	{ PCI_CHIP_ARROWLAKE_U_I2C_5, "Intel Arrow Lake-H/U I2C Controller-5", IG4_TIGERLAKE},
 };
 
 static int
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 741a7c013f7d..ec1664fac701 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -11,9 +11,9 @@
  */
 
 /*-
- * The following functions are based on the vn(4) driver: mdstart_swap(),
- * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
- * and as such under the following copyright:
+ * The following functions are based on the historical vn(4) driver:
+ * mdstart_swap(), mdstart_vnode(), mdcreate_swap(), mdcreate_vnode()
+ * and mddestroy(), and as such under the following copyright:
  *
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1990, 1993
@@ -89,6 +89,8 @@
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/disk.h>
+#include <sys/param.h>
+#include <sys/bus.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
@@ -2082,8 +2084,10 @@ g_md_init(struct g_class *mp __unused)
 {
 	caddr_t mod;
 	u_char *ptr, *name, *type;
+	u_char scratch[40];
 	unsigned len;
 	int i;
+	vm_offset_t paddr;
 
 	/* figure out log2(NINDIR) */
 	for (i = NINDIR, nshift = -1; i; nshift++)
@@ -2123,6 +2127,25 @@ g_md_init(struct g_class *mp __unused)
 			sx_xunlock(&md_sx);
 		}
 	}
+
+	/*
+	 * Load up to 32 pre-loaded disks
+	 */
+	for (int i = 0; i < 32; i++) {
+		if (resource_long_value("md", i, "physaddr",
+			(long *) &paddr) != 0 ||
+		    resource_int_value("md", i, "len", &len) != 0)
+		        break;
+		ptr = (char *)pmap_map(NULL, paddr, paddr + len, VM_PROT_READ);
+		if (ptr != NULL && len != 0) {
+			sprintf(scratch, "preload%d 0x%016jx", i,
+			    (uintmax_t)paddr);
+			sx_xlock(&md_sx);
+			md_preloaded(ptr, len, scratch);
+			sx_xunlock(&md_sx);
+		}
+	}
+
 	status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 	    0600, MDCTL_NAME);
 	g_topology_lock();
diff --git a/sys/dev/mgb/if_mgb.c b/sys/dev/mgb/if_mgb.c
index 1240d0f84415..409f34167df0 100644
--- a/sys/dev/mgb/if_mgb.c
+++ b/sys/dev/mgb/if_mgb.c
@@ -1435,7 +1435,7 @@ mgb_hw_teardown(struct mgb_softc *sc)
 
 	/* Stop MAC */
 	CSR_CLEAR_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL);
-	CSR_WRITE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL);
+	CSR_CLEAR_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL);
 	if ((err = mgb_wait_for_bits(sc, MGB_MAC_RX, MGB_MAC_DSBL, 0)))
 		return (err);
 	if ((err = mgb_wait_for_bits(sc, MGB_MAC_TX, MGB_MAC_DSBL, 0)))
diff --git a/sys/dev/mlx5/mlx5_accel/ipsec.h b/sys/dev/mlx5/mlx5_accel/ipsec.h
index 361b9f72d873..c3f3a2372482 100644
--- a/sys/dev/mlx5/mlx5_accel/ipsec.h
+++ b/sys/dev/mlx5/mlx5_accel/ipsec.h
@@ -260,8 +260,8 @@ int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv);
 void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv);
 int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv);
 int mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr);
-void mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
-    struct mlx5e_rq_mbuf *mr);
+void mlx5e_accel_ipsec_handle_rx_cqe(if_t ifp, struct mbuf *mb,
+    struct mlx5_cqe64 *cqe, struct mlx5e_rq_mbuf *mr);
 
 static inline int mlx5e_accel_ipsec_flow(struct mlx5_cqe64 *cqe)
 {
@@ -269,12 +269,12 @@ static inline int mlx5e_accel_ipsec_flow(struct mlx5_cqe64 *cqe)
 }
 
 static inline void
-mlx5e_accel_ipsec_handle_rx(struct mbuf *mb, struct mlx5_cqe64 *cqe,
+mlx5e_accel_ipsec_handle_rx(if_t ifp, struct mbuf *mb, struct mlx5_cqe64 *cqe,
     struct mlx5e_rq_mbuf *mr)
 {
 	u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
 
 	if (MLX5_IPSEC_METADATA_MARKER(ipsec_meta_data))
-		mlx5e_accel_ipsec_handle_rx_cqe(mb, cqe, mr);
+		mlx5e_accel_ipsec_handle_rx_cqe(ifp, mb, cqe, mr);
 }
 #endif	/* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
index 0883cfb2d510..5dccb8bc2b87 100644
--- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
+++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_rxtx.c
@@ -24,11 +24,14 @@
  *
  */
 
+#include "opt_ipsec.h"
+
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <netipsec/keydb.h>
 #include <netipsec/ipsec_offload.h>
+#include <netipsec/xform.h>
 #include <dev/mlx5/qp.h>
 #include <dev/mlx5/mlx5_en/en.h>
 #include <dev/mlx5/mlx5_accel/ipsec.h>
@@ -48,7 +51,8 @@ mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr)
 		return (0);
 
 	mtag = (struct ipsec_accel_in_tag *)m_tag_get(
-	    PACKET_TAG_IPSEC_ACCEL_IN, sizeof(*mtag), M_NOWAIT);
+	    PACKET_TAG_IPSEC_ACCEL_IN, sizeof(struct ipsec_accel_in_tag) -
+	    __offsetof(struct ipsec_accel_in_tag, xh), M_NOWAIT);
 	if (mtag == NULL)
 		return (-ENOMEM);
 	mr->ipsec_mtag = mtag;
@@ -56,8 +60,8 @@ mlx5_accel_ipsec_rx_tag_add(if_t ifp, struct mlx5e_rq_mbuf *mr)
 }
 
 void
-mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
-    struct mlx5e_rq_mbuf *mr)
+mlx5e_accel_ipsec_handle_rx_cqe(if_t ifp, struct mbuf *mb,
+    struct mlx5_cqe64 *cqe, struct mlx5e_rq_mbuf *mr)
 {
 	struct ipsec_accel_in_tag *mtag;
 	u32 drv_spi;
@@ -65,10 +69,12 @@ mlx5e_accel_ipsec_handle_rx_cqe(struct mbuf *mb, struct mlx5_cqe64 *cqe,
 	drv_spi = MLX5_IPSEC_METADATA_HANDLE(be32_to_cpu(cqe->ft_metadata));
 	mtag = mr->ipsec_mtag;
 	WARN_ON(mtag == NULL);
-	mr->ipsec_mtag = NULL;
 	if (mtag != NULL) {
 		mtag->drv_spi = drv_spi;
-		m_tag_prepend(mb, &mtag->tag);
+		if (ipsec_accel_fill_xh(ifp, drv_spi, &mtag->xh)) {
+			m_tag_prepend(mb, &mtag->tag);
+			mr->ipsec_mtag = NULL;
+		}
 	}
 }
 
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
index 8b8f2e570245..89d2010656c5 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c
@@ -42,13 +42,30 @@
 
 static if_snd_tag_free_t mlx5e_tls_rx_snd_tag_free;
 static if_snd_tag_modify_t mlx5e_tls_rx_snd_tag_modify;
+static if_snd_tag_status_str_t mlx5e_tls_rx_snd_tag_status_str;
 
 static const struct if_snd_tag_sw mlx5e_tls_rx_snd_tag_sw = {
 	.snd_tag_modify = mlx5e_tls_rx_snd_tag_modify,
 	.snd_tag_free = mlx5e_tls_rx_snd_tag_free,
+	.snd_tag_status_str = mlx5e_tls_rx_snd_tag_status_str,
 	.type = IF_SND_TAG_TYPE_TLS_RX
 };
 
+static const char *mlx5e_tls_rx_progress_params_auth_state_str[] = {
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD] = "no_offload",
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD] = "offload",
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION] =
+	    "authentication",
+};
+
+static const char *mlx5e_tls_rx_progress_params_record_tracker_state_str[] = {
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START] = "start",
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING] =
+	    "tracking",
+	[MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING] =
+	    "searching",
+};
+
 MALLOC_DEFINE(M_MLX5E_TLS_RX, "MLX5E_TLS_RX", "MLX5 ethernet HW TLS RX");
 
 /* software TLS RX context */
@@ -250,7 +267,8 @@ mlx5e_tls_rx_send_progress_parameters_sync(struct mlx5e_iq *iq,
 	mtx_unlock(&iq->lock);
 
 	while (1) {
-		if (wait_for_completion_timeout(&ptag->progress_complete, hz) != 0)
+		if (wait_for_completion_timeout(&ptag->progress_complete,
+		    msecs_to_jiffies(1000)) != 0)
 			break;
 		priv = container_of(iq, struct mlx5e_channel, iq)->priv;
 		if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
@@ -331,7 +349,8 @@ done:
  * Zero is returned upon success, else some error happened.
  */
 static int
-mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag)
+mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq,
+    struct mlx5e_tls_rx_tag *ptag, mlx5e_iq_callback_t *cb)
 {
 	struct mlx5e_get_tls_progress_params_wqe *wqe;
 	const u32 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
@@ -367,7 +386,7 @@ mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_r
 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
 
 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	iq->data[pi].callback = &mlx5e_tls_rx_receive_progress_parameters_cb;
+	iq->data[pi].callback = cb;
 	iq->data[pi].arg = ptag;
 
 	m_snd_tag_ref(&ptag->tag);
@@ -640,7 +659,8 @@ mlx5e_tls_rx_set_params(void *ctx, struct inpcb *inp, const struct tls_session_p
 		return (EINVAL);
 
 	MLX5_SET64(sw_tls_rx_cntx, ctx, param.initial_record_number, tls_sn_he);
-	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, tcp_sn_he);
+	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, 0);
+	MLX5_SET(sw_tls_rx_cntx, ctx, progress.next_record_tcp_sn, tcp_sn_he);
 
 	return (0);
 }
@@ -819,6 +839,7 @@ mlx5e_tls_rx_snd_tag_alloc(if_t ifp,
 	}
 
 	ptag->flow_rule = flow_rule;
+	init_completion(&ptag->progress_complete);
 
 	return (0);
 
@@ -968,7 +989,8 @@ mlx5e_tls_rx_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_param
 	    params->tls_rx.tls_rec_length,
 	    params->tls_rx.tls_seq_number) &&
 	    ptag->tcp_resync_pending == 0) {
-		err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag);
+		err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag,
+		    &mlx5e_tls_rx_receive_progress_parameters_cb);
 		if (err != 0) {
 			MLX5E_TLS_RX_STAT_INC(ptag, rx_resync_err, 1);
 		} else {
@@ -1001,6 +1023,74 @@ mlx5e_tls_rx_snd_tag_free(struct m_snd_tag *pmt)
 	queue_work(priv->tls_rx.wq, &ptag->work);
 }
 
+static void
+mlx5e_tls_rx_str_status_cb(void *arg)
+{
+	struct mlx5e_tls_rx_tag *ptag;
+
+	ptag = (struct mlx5e_tls_rx_tag *)arg;
+	complete_all(&ptag->progress_complete);
+	m_snd_tag_rele(&ptag->tag);
+}
+
+static int
+mlx5e_tls_rx_snd_tag_status_str(struct m_snd_tag *pmt, char *buf, size_t *sz)
+{
+	int err, out_size;
+	struct mlx5e_iq *iq;
+	void *buffer;
+	uint32_t tracker_state_val;
+	uint32_t auth_state_val;
+	struct mlx5e_priv *priv;
+	struct mlx5e_tls_rx_tag *ptag = 
+	    container_of(pmt, struct mlx5e_tls_rx_tag, tag);
+
+	if (buf == NULL)
+		return (0);
+
+	MLX5E_TLS_RX_TAG_LOCK(ptag);
+	priv = container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx);
+	iq = mlx5e_tls_rx_get_iq(priv, ptag->flowid, ptag->flowtype);
+	reinit_completion(&ptag->progress_complete);
+	err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag,
+	    &mlx5e_tls_rx_str_status_cb);
+	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
+	if (err != 0)
+		return (err);
+
+	for (;;) {
+		if (wait_for_completion_timeout(&ptag->progress_complete,
+		    msecs_to_jiffies(1000)) != 0)
+			break;
+		if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+		    pci_channel_offline(priv->mdev->pdev) != 0)
+			return (ENXIO);
+	}
+	buffer = mlx5e_tls_rx_get_progress_buffer(ptag);
+	tracker_state_val = MLX5_GET(tls_progress_params, buffer,
+	    record_tracker_state);
+	auth_state_val = MLX5_GET(tls_progress_params, buffer, auth_state);
+
+	/* Validate tracker state value is in range */
+	if (tracker_state_val >
+	    MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING)
+		return (EINVAL);
+
+	/* Validate auth state value is in range */
+	if (auth_state_val >
+	    MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION)
+		return (EINVAL);
+
+	out_size = snprintf(buf, *sz, "tracker_state: %s, auth_state: %s",
+	    mlx5e_tls_rx_progress_params_record_tracker_state_str[
+		tracker_state_val],
+	    mlx5e_tls_rx_progress_params_auth_state_str[auth_state_val]);
+
+	if (out_size <= *sz)
+		*sz = out_size;
+	return (0);
+}
+
 #else
 
 int
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
index 6b53db6fea23..eb569488631a 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -467,7 +467,7 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
 		break;
 	}
 
-	mlx5e_accel_ipsec_handle_rx(mb, cqe, mr);
+	mlx5e_accel_ipsec_handle_rx(ifp, mb, cqe, mr);
 }
 
 static inline void
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 73a7cee4aad0..fd7f00ced14b 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -48,7 +48,7 @@
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
-						struct nvme_async_event_request *aer);
+    struct nvme_async_event_request *aer);
 
 static void
 nvme_ctrlr_barrier(struct nvme_controller *ctrlr, int flags)
@@ -680,96 +680,6 @@ nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
 }
 
 static void
-nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
-{
-	struct nvme_async_event_request		*aer = arg;
-	struct nvme_health_information_page	*health_info;
-	struct nvme_ns_list			*nsl;
-	struct nvme_error_information_entry	*err;
-	int i;
-
-	/*
-	 * If the log page fetch for some reason completed with an error,
-	 *  don't pass log page data to the consumers.  In practice, this case
-	 *  should never happen.
-	 */
-	if (nvme_completion_is_error(cpl))
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, NULL, 0);
-	else {
-		/* Convert data to host endian */
-		switch (aer->log_page_id) {
-		case NVME_LOG_ERROR:
-			err = (struct nvme_error_information_entry *)aer->log_page_buffer;
-			for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
-				nvme_error_information_entry_swapbytes(err++);
-			break;
-		case NVME_LOG_HEALTH_INFORMATION:
-			nvme_health_information_page_swapbytes(
-			    (struct nvme_health_information_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_CHANGED_NAMESPACE:
-			nvme_ns_list_swapbytes(
-			    (struct nvme_ns_list *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_COMMAND_EFFECT:
-			nvme_command_effects_page_swapbytes(
-			    (struct nvme_command_effects_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_RES_NOTIFICATION:
-			nvme_res_notification_page_swapbytes(
-			    (struct nvme_res_notification_page *)aer->log_page_buffer);
-			break;
-		case NVME_LOG_SANITIZE_STATUS:
-			nvme_sanitize_status_page_swapbytes(
-			    (struct nvme_sanitize_status_page *)aer->log_page_buffer);
-			break;
-		default:
-			break;
-		}
-
-		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
-			health_info = (struct nvme_health_information_page *)
-			    aer->log_page_buffer;
-			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
-			    health_info->critical_warning);
-			/*
-			 * Critical warnings reported through the
-			 *  SMART/health log page are persistent, so
-			 *  clear the associated bits in the async event
-			 *  config so that we do not receive repeated
-			 *  notifications for the same event.
-			 */
-			aer->ctrlr->async_event_config &=
-			    ~health_info->critical_warning;
-			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
-			    aer->ctrlr->async_event_config, NULL, NULL);
-		} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
-		    !nvme_use_nvd) {
-			nsl = (struct nvme_ns_list *)aer->log_page_buffer;
-			for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
-				if (nsl->ns[i] > NVME_MAX_NAMESPACES)
-					break;
-				nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
-			}
-		}
-
-		/*
-		 * Pass the cpl data from the original async event completion,
-		 *  not the log page fetch.
-		 */
-		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
-		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
-	}
-
-	/*
-	 * Repost another asynchronous event request to replace the one
-	 *  that just completed.
-	 */
-	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-}
-
-static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
@@ -784,33 +694,18 @@ nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 		return;
 	}
 
-	/* Associated log page is in bits 23:16 of completion entry dw0. */
+	/*
+	 * Save the completion status and associated log page is in bits 23:16
+	 * of completion entry dw0. Print a message and queue it for further
+	 * processing.
+	 */
+	memcpy(&aer->cpl, cpl, sizeof(*cpl));
 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cpl->cdw0);
-
 	nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
 	    " page 0x%02x)\n", NVMEV(NVME_ASYNC_EVENT_TYPE, cpl->cdw0),
 	    NVMEV(NVME_ASYNC_EVENT_INFO, cpl->cdw0),
 	    aer->log_page_id);
-
-	if (is_log_page_id_valid(aer->log_page_id)) {
-		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
-		    aer->log_page_id);
-		memcpy(&aer->cpl, cpl, sizeof(*cpl));
-		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
-		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
-		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
-		    aer);
-		/* Wait to notify consumers until after log page is fetched. */
-	} else {
-		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
-		    NULL, 0);
-
-		/*
-		 * Repost another asynchronous event request to replace the one
-		 *  that just completed.
-		 */
-		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
-	}
+	taskqueue_enqueue(aer->ctrlr->taskqueue, &aer->task);
 }
 
 static void
@@ -819,15 +714,21 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 {
 	struct nvme_request *req;
 
-	aer->ctrlr = ctrlr;
 	/*
-	 * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
-	 * callback context.  AER completions should be handled on a dedicated
-	 * thread.
+	 * We're racing the reset thread, so let that process submit this again.
+	 * XXX does this really solve that race? And is that race even possible
+	 * since we only reset when we've no theard from the card in a long
+	 * time. Why would we get an AER in the middle of that just before we
+	 * kick off the reset?
 	 */
-	req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+	if (ctrlr->is_resetting)
+		return;
+
+	aer->ctrlr = ctrlr;
+	req = nvme_allocate_request_null(M_WAITOK, nvme_ctrlr_async_event_cb,
 	    aer);
 	aer->req = req;
+	aer->log_page_id = 0;		/* Not a valid page */
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
@@ -1203,6 +1104,140 @@ nvme_ctrlr_reset_task(void *arg, int pending)
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
+static void
+nvme_ctrlr_aer_done(void *arg,  const struct nvme_completion *cpl)
+{
+	struct nvme_async_event_request	*aer = arg;
+
+	mtx_lock(&aer->mtx);
+	if (nvme_completion_is_error(cpl))
+		aer->log_page_size = (uint32_t)-1;
+	else
+		aer->log_page_size = nvme_ctrlr_get_log_page_size(
+		    aer->ctrlr, aer->log_page_id);
+	wakeup(aer);
+	mtx_unlock(&aer->mtx);
+}
+
+static void
+nvme_ctrlr_aer_task(void *arg, int pending)
+{
+	struct nvme_async_event_request	*aer = arg;
+	struct nvme_controller	*ctrlr = aer->ctrlr;
+	uint32_t len;
+
+	/*
+	 * We're resetting, so just punt.
+	 */
+	if (ctrlr->is_resetting)
+		return;
+
+	if (!is_log_page_id_valid(aer->log_page_id)) {
+		/*
+		 * Repost another asynchronous event request to replace the one
+		 * that just completed.
+		 */
+		nvme_notify_async_consumers(ctrlr, &aer->cpl, aer->log_page_id,
+		    NULL, 0);
+		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+		goto out;
+	}
+
+	aer->log_page_size = 0;
+	len = nvme_ctrlr_get_log_page_size(aer->ctrlr, aer->log_page_id);
+	nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
+	    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer, len,
+	    nvme_ctrlr_aer_done, aer);
+	mtx_lock(&aer->mtx);
+	while (aer->log_page_size == 0)
+		mtx_sleep(aer, &aer->mtx, PRIBIO, "nvme_pt", 0);
+	mtx_unlock(&aer->mtx);
+
+	if (aer->log_page_size != (uint32_t)-1) {
+		/*
+		 * If the log page fetch for some reason completed with an
+		 * error, don't pass log page data to the consumers.  In
+		 * practice, this case should never happen.
+		 */
+		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+		    aer->log_page_id, NULL, 0);
+		goto out;
+	}
+
+	/* Convert data to host endian */
+	switch (aer->log_page_id) {
+	case NVME_LOG_ERROR: {
+		struct nvme_error_information_entry *err =
+		    (struct nvme_error_information_entry *)aer->log_page_buffer;
+		for (int i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
+			nvme_error_information_entry_swapbytes(err++);
+		break;
+	}
+	case NVME_LOG_HEALTH_INFORMATION:
+		nvme_health_information_page_swapbytes(
+			(struct nvme_health_information_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_CHANGED_NAMESPACE:
+		nvme_ns_list_swapbytes(
+			(struct nvme_ns_list *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_COMMAND_EFFECT:
+		nvme_command_effects_page_swapbytes(
+			(struct nvme_command_effects_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_RES_NOTIFICATION:
+		nvme_res_notification_page_swapbytes(
+			(struct nvme_res_notification_page *)aer->log_page_buffer);
+		break;
+	case NVME_LOG_SANITIZE_STATUS:
+		nvme_sanitize_status_page_swapbytes(
+			(struct nvme_sanitize_status_page *)aer->log_page_buffer);
+		break;
+	default:
+		break;
+	}
+
+	if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+		struct nvme_health_information_page *health_info =
+		    (struct nvme_health_information_page *)aer->log_page_buffer;
+
+		/*
+		 * Critical warnings reported through the SMART/health log page
+		 * are persistent, so clear the associated bits in the async
+		 * event config so that we do not receive repeated notifications
+		 * for the same event.
+		 */
+		nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+		    health_info->critical_warning);
+		aer->ctrlr->async_event_config &=
+		    ~health_info->critical_warning;
+		nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+		    aer->ctrlr->async_event_config, NULL, NULL);
+	} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE) {
+		struct nvme_ns_list *nsl =
+		    (struct nvme_ns_list *)aer->log_page_buffer;
+		for (int i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+			if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+				break;
+			nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+		}
+	}
+
+	/*
+	 * Pass the cpl data from the original async event completion, not the
+	 * log page fetch.
+	 */
+	nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
+	    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+
+	/*
+	 * Repost another asynchronous event request to replace the one
+	 *  that just completed.
+	 */
+out:
+	nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
+}
+
 /*
  * Poll all the queues enabled on the device for completion.
  */
@@ -1574,13 +1609,8 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	/*
 	 * Create 2 threads for the taskqueue. The reset thread will block when
 	 * it detects that the controller has failed until all I/O has been
-	 * failed up the stack. The fail_req task needs to be able to run in
-	 * this case to finish the request failure for some cases.
-	 *
-	 * We could partially solve this race by draining the failed requeust
-	 * queue before proceding to free the sim, though nothing would stop
-	 * new I/O from coming in after we do that drain, but before we reach
-	 * cam_sim_free, so this big hammer is used instead.
+	 * failed up the stack. The second thread is used for AER events, which
+	 * can block, but only briefly for memory and log page fetching.
 	 */
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
@@ -1590,7 +1620,12 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 	ctrlr->is_initialized = false;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
-	STAILQ_INIT(&ctrlr->fail_req);
+	for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+		struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+		TASK_INIT(&aer->task, 0, nvme_ctrlr_aer_task, aer);
+		mtx_init(&aer->mtx, "AER mutex", NULL, MTX_DEF);
+	}
 	ctrlr->is_failed = false;
 
 	make_dev_args_init(&md_args);
@@ -1678,8 +1713,14 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 	}
 
 noadminq:
-	if (ctrlr->taskqueue)
+	if (ctrlr->taskqueue) {
 		taskqueue_free(ctrlr->taskqueue);
+		for (int i = 0; i < NVME_MAX_ASYNC_EVENTS; i++) {
+			struct nvme_async_event_request *aer = &ctrlr->aer[i];
+
+			mtx_destroy(&aer->mtx);
+		}
+	}
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 949e69ec9290..36f00fedc48e 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -123,6 +123,8 @@ struct nvme_request {
 struct nvme_async_event_request {
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
+	struct task			task;
+	struct mtx			mtx;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
@@ -307,8 +309,6 @@ struct nvme_controller {
 	bool				isr_warned;
 	bool				is_initialized;
 
-	STAILQ_HEAD(, nvme_request)	fail_req;
-
 	/* Host Memory Buffer */
 	int				hmb_nchunks;
 	size_t				hmb_chunk;
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index dbdd4568bdf1..1ac0d142443b 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -27,6 +27,7 @@
 #include <dev/nvmf/host/nvmf_var.h>
 
 static struct cdevsw nvmf_cdevsw;
+static struct taskqueue *nvmf_tq;
 
 bool nvmf_fail_disconnect = false;
 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
@@ -34,7 +35,10 @@ SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
 
 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
 
+static void	nvmf_controller_loss_task(void *arg, int pending);
 static void	nvmf_disconnect_task(void *arg, int pending);
+static void	nvmf_request_reconnect(struct nvmf_softc *sc);
+static void	nvmf_request_reconnect_task(void *arg, int pending);
 static void	nvmf_shutdown_pre_sync(void *arg, int howto);
 static void	nvmf_shutdown_post_sync(void *arg, int howto);
 
@@ -294,6 +298,9 @@ nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl)
 	admin = nvlist_get_nvlist(nvl, "admin");
 	io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
 	kato = dnvlist_get_number(nvl, "kato", 0);
+	sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
+	sc->controller_loss_timeout = dnvlist_get_number(nvl,
+	    "controller_loss_timeout", 0);
 
 	/* Setup the admin queue. */
 	sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
@@ -504,6 +511,10 @@ nvmf_attach(device_t dev)
 	callout_init(&sc->ka_tx_timer, 1);
 	sx_init(&sc->connection_lock, "nvmf connection");
 	TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
+	TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
+	    nvmf_controller_loss_task, sc);
+	TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
+	    nvmf_request_reconnect_task, sc);
 
 	oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
@@ -603,7 +614,9 @@ out:
 
 	nvmf_destroy_aer(sc);
 
-	taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+	taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+	taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
+	taskqueue_drain(nvmf_tq, &sc->disconnect_task);
 	sx_destroy(&sc->connection_lock);
 	nvlist_destroy(sc->rparams);
 	free(sc->cdata, M_NVMF);
@@ -613,7 +626,7 @@ out:
 void
 nvmf_disconnect(struct nvmf_softc *sc)
 {
-	taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
+	taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
 }
 
 static void
@@ -676,6 +689,74 @@ nvmf_disconnect_task(void *arg, int pending __unused)
 	nvmf_destroy_qp(sc->admin);
 	sc->admin = NULL;
 
+	if (sc->reconnect_delay != 0)
+		nvmf_request_reconnect(sc);
+	if (sc->controller_loss_timeout != 0)
+		taskqueue_enqueue_timeout(nvmf_tq,
+		    &sc->controller_loss_task, sc->controller_loss_timeout *
+		    hz);
+
+	sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_controller_loss_task(void *arg, int pending)
+{
+	struct nvmf_softc *sc = arg;
+	device_t dev;
+	int error;
+
+	bus_topo_lock();
+	sx_xlock(&sc->connection_lock);
+	if (sc->admin != NULL || sc->detaching) {
+		/* Reconnected or already detaching. */
+		sx_xunlock(&sc->connection_lock);
+		bus_topo_unlock();
+		return;
+	}
+
+	sc->controller_timedout = true;
+	sx_xunlock(&sc->connection_lock);
+
+	/*
+	 * XXX: Doing this from here is a bit ugly.  We don't have an
+	 * extra reference on `dev` but bus_topo_lock should block any
+	 * concurrent device_delete_child invocations.
+	 */
+	dev = sc->dev;
+	error = device_delete_child(root_bus, dev);
+	if (error != 0)
+		device_printf(dev,
+		    "failed to detach after controller loss: %d\n", error);
+	bus_topo_unlock();
+}
+
+static void
+nvmf_request_reconnect(struct nvmf_softc *sc)
+{
+	char buf[64];
+
+	sx_assert(&sc->connection_lock, SX_LOCKED);
+
+	snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
+	devctl_notify("nvme", "controller", "RECONNECT", buf);
+	taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
+	    sc->reconnect_delay * hz);
+}
+
+static void
+nvmf_request_reconnect_task(void *arg, int pending)
+{
+	struct nvmf_softc *sc = arg;
+
+	sx_xlock(&sc->connection_lock);
+	if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
+		/* Reconnected or already detaching. */
+		sx_xunlock(&sc->connection_lock);
+		return;
+	}
+
+	nvmf_request_reconnect(sc);
 	sx_xunlock(&sc->connection_lock);
 }
 
@@ -699,7 +780,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
 	}
 
 	sx_xlock(&sc->connection_lock);
-	if (sc->admin != NULL || sc->detaching) {
+	if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
 		error = EBUSY;
 		goto out;
 	}
@@ -745,6 +826,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
 	nvmf_reconnect_sim(sc);
 
 	nvmf_rescan_all_ns(sc);
+
+	taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
+	taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
 out:
 	sx_xunlock(&sc->connection_lock);
 	nvlist_destroy(nvl);
@@ -852,7 +936,21 @@ nvmf_detach(device_t dev)
 	}
 	free(sc->io, M_NVMF);
 
-	taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+	taskqueue_drain(nvmf_tq, &sc->disconnect_task);
+	if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
+	    NULL) != 0)
+		taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+
+	/*
+	 * Don't cancel/drain the controller loss task if that task
+	 * has fired and is triggering the detach.
+	 */
+	if (!sc->controller_timedout) {
+		if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
+		    NULL) != 0)
+			taskqueue_drain_timeout(nvmf_tq,
+			    &sc->controller_loss_task);
+	}
 
 	if (sc->admin != NULL)
 		nvmf_destroy_qp(sc->admin);
@@ -1154,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = {
 static int
 nvmf_modevent(module_t mod, int what, void *arg)
 {
+	int error;
+
 	switch (what) {
 	case MOD_LOAD:
-		return (nvmf_ctl_load());
+		error = nvmf_ctl_load();
+		if (error != 0)
+			return (error);
+
+		nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
+		    taskqueue_thread_enqueue, &nvmf_tq);
+		taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
+		return (0);
 	case MOD_QUIESCE:
 		return (0);
 	case MOD_UNLOAD:
 		nvmf_ctl_unload();
 		destroy_dev_drain(&nvmf_cdevsw);
+		if (nvmf_tq != NULL)
+			taskqueue_free(nvmf_tq);
 		return (0);
 	default:
 		return (EOPNOTSUPP);
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
index e45a31f413a4..606245b3969c 100644
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -75,9 +75,15 @@ struct nvmf_softc {
 	struct callout ka_rx_timer;
 	sbintime_t ka_rx_sbt;
 
+	struct timeout_task request_reconnect_task;
+	struct timeout_task controller_loss_task;
+	uint32_t reconnect_delay;
+	uint32_t controller_loss_timeout;
+
 	struct sx connection_lock;
 	struct task disconnect_task;
 	bool detaching;
+	bool controller_timedout;
 
 	u_int num_aer;
 	struct nvmf_aer *aer;
diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h
index d4e7b1511e9d..9b2b4c1dea40 100644
--- a/sys/dev/nvmf/nvmf.h
+++ b/sys/dev/nvmf/nvmf.h
@@ -27,6 +27,13 @@
 #define	NVMF_NN			(1024)
 
 /*
+ * Default timeouts for Fabrics hosts.  These match values used by
+ * Linux.
+ */
+#define	NVMF_DEFAULT_RECONNECT_DELAY	10
+#define	NVMF_DEFAULT_CONTROLLER_LOSS	600
+
+/*
  * (data, size) is the userspace buffer for a packed nvlist.
  *
  * For requests that copyout an nvlist, len is the amount of data
@@ -68,6 +75,8 @@ struct nvmf_ioc_nv {
  *
  * number			trtype
  * number			kato	(optional)
+ * number                       reconnect_delay (optional)
+ * number                       controller_loss_timeout (optional)
  * qpair handoff nvlist		admin
  * qpair handoff nvlist array	io
  * binary			cdata	struct nvme_controller_data
@@ -81,6 +90,8 @@ struct nvmf_ioc_nv {
  * string			hostnqn
  * number			num_io_queues
  * number			kato	(optional)
+ * number                       reconnect_delay (optional)
+ * number                       controller_loss_timeout (optional)
  * number			io_qsize
  * bool				sq_flow_control
  *
diff --git a/sys/dev/ofw/ofw_bus_subr.c b/sys/dev/ofw/ofw_bus_subr.c
index 4d0479dfb957..b99d784929bc 100644
--- a/sys/dev/ofw/ofw_bus_subr.c
+++ b/sys/dev/ofw/ofw_bus_subr.c
@@ -634,11 +634,89 @@ ofw_bus_find_iparent(phandle_t node)
 	return (iparent);
 }
 
+static phandle_t
+ofw_bus_search_iparent(phandle_t node)
+{
+	phandle_t iparent;
+
+	do {
+		if (OF_getencprop(node, "interrupt-parent", &iparent,
+		    sizeof(iparent)) > 0) {
+			node = OF_node_from_xref(iparent);
+		} else {
+			node = OF_parent(node);
+		}
+		if (node == 0)
+			return (0);
+	} while (!OF_hasprop(node, "#interrupt-cells"));
+
+	return (OF_xref_from_node(node));
+}
+
+static int
+ofw_bus_traverse_imap(phandle_t inode, phandle_t node, uint32_t *intr,
+    int intrsz, pcell_t *res, int ressz, phandle_t *iparentp)
+{
+	struct ofw_bus_iinfo ii;
+	void *reg;
+	uint32_t *intrp;
+	phandle_t iparent;
+	int rv = 0;
+
+	/* We already have an interrupt controller */
+	if (OF_hasprop(node, "interrupt-controller"))
+		return (0);
+
+	intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+	memcpy(intrp, intr, intrsz);
+
+	while (true) {
+		/* There is no interrupt-map to follow */
+		if (!OF_hasprop(inode, "interrupt-map")) {
+			free(intrp, M_OFWPROP);
+			return (0);
+		}
+
+		memset(&ii, 0, sizeof(ii));
+		ofw_bus_setup_iinfo(inode, &ii, sizeof(cell_t));
+
+		reg = NULL;
+		if (ii.opi_addrc > 0)
+			reg = malloc(ii.opi_addrc, M_OFWPROP, M_WAITOK);
+
+		rv = ofw_bus_lookup_imap(node, &ii, reg, ii.opi_addrc, intrp,
+		    intrsz, res, ressz, &iparent);
+
+		free(reg, M_OFWPROP);
+		free(ii.opi_imap, M_OFWPROP);
+		free(ii.opi_imapmsk, M_OFWPROP);
+		free(intrp, M_OFWPROP);
+
+		if (rv == 0)
+			return (0);
+
+		node = inode;
+		inode = OF_node_from_xref(iparent);
+
+		/* Stop when we have an interrupt controller */
+		if (OF_hasprop(inode, "interrupt-controller")) {
+			*iparentp = iparent;
+			return (rv);
+		}
+
+		intrsz = rv * sizeof(pcell_t);
+		intrp = malloc(intrsz, M_OFWPROP, M_WAITOK);
+		memcpy(intrp, res, intrsz);
+	}
+}
+
 int
 ofw_bus_intr_to_rl(device_t dev, phandle_t node,
     struct resource_list *rl, int *rlen)
 {
-	phandle_t iparent;
+	phandle_t iparent, iparent_node;
+	uint32_t result[16];
+	uint32_t intrpcells, *intrp;
 	uint32_t icells, *intr;
 	int err, i, irqnum, nintr, rid;
 	bool extended;
@@ -646,15 +724,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 	nintr = OF_getencprop_alloc_multi(node, "interrupts",  sizeof(*intr),
 	    (void **)&intr);
 	if (nintr > 0) {
-		iparent = ofw_bus_find_iparent(node);
+		iparent = ofw_bus_search_iparent(node);
 		if (iparent == 0) {
 			device_printf(dev, "No interrupt-parent found, "
 			    "assuming direct parent\n");
 			iparent = OF_parent(node);
 			iparent = OF_xref_from_node(iparent);
 		}
-		if (OF_searchencprop(OF_node_from_xref(iparent), 
-		    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
+		iparent_node = OF_node_from_xref(iparent);
+		if (OF_searchencprop(iparent_node, "#interrupt-cells", &icells,
+		    sizeof(icells)) == -1) {
 			device_printf(dev, "Missing #interrupt-cells "
 			    "property, assuming <1>\n");
 			icells = 1;
@@ -677,7 +756,8 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 	for (i = 0; i < nintr; i += icells) {
 		if (extended) {
 			iparent = intr[i++];
-			if (OF_searchencprop(OF_node_from_xref(iparent), 
+			iparent_node = OF_node_from_xref(iparent);
+			if (OF_searchencprop(iparent_node,
 			    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 				device_printf(dev, "Missing #interrupt-cells "
 				    "property\n");
@@ -691,7 +771,16 @@ ofw_bus_intr_to_rl(device_t dev, phandle_t node,
 				break;
 			}
 		}
-		irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]);
+
+		intrp = &intr[i];
+		intrpcells = ofw_bus_traverse_imap(iparent_node, node, intrp,
+		    icells * sizeof(intr[0]), result, sizeof(result), &iparent);
+		if (intrpcells > 0)
+			intrp = result;
+		else
+			intrpcells = icells;
+
+		irqnum = ofw_bus_map_intr(dev, iparent, intrpcells, intrp);
 		resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1);
 	}
 	if (rlen != NULL)
diff --git a/sys/dev/qlnx/qlnxe/qlnx_os.c b/sys/dev/qlnx/qlnxe/qlnx_os.c
index 05ec69a70dfe..4ad190374f87 100644
--- a/sys/dev/qlnx/qlnxe/qlnx_os.c
+++ b/sys/dev/qlnx/qlnxe/qlnx_os.c
@@ -30,6 +30,8 @@
  * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131.
  */
 
+#include "opt_inet.h"
+
 #include <sys/cdefs.h>
 #include "qlnx_os.h"
 #include "bcm_osal.h"
@@ -2306,8 +2308,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
         else if (device_id == QLOGIC_PCI_DEVICE_ID_1644)
 		if_setbaudrate(ifp, IF_Gbps(100));
 
-        if_setcapabilities(ifp, IFCAP_LINKSTATE);
-
         if_setinitfn(ifp, qlnx_init);
         if_setsoftc(ifp, ha);
         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
@@ -2341,7 +2341,6 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 
 	if_setcapabilities(ifp, IFCAP_HWCSUM);
 	if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
-
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
@@ -2350,6 +2349,8 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
 	if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
+	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
 
 	if_sethwtsomax(ifp,  QLNX_MAX_TSO_FRAME_SIZE -
 				(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
@@ -2778,7 +2779,7 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
 
 		if (!p_ptt) {
 			QL_DPRINT1(ha, "ecore_ptt_acquire failed\n");
-			ret = -1;
+			ret = ERESTART;
 			break;
 		}
 
@@ -2789,7 +2790,7 @@ qlnx_ioctl(if_t ifp, u_long cmd, caddr_t data)
 		ecore_ptt_release(p_hwfn, p_ptt);
 
 		if (ret) {
-			ret = -1;
+			ret = ENODEV;
 			break;
 		}
 
diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c
index c4282c723a44..8363de99a60a 100644
--- a/sys/dev/random/fortuna.c
+++ b/sys/dev/random/fortuna.c
@@ -341,6 +341,13 @@ random_fortuna_process_event(struct harvest_event *event)
 	u_int pl;
 
 	RANDOM_RESEED_LOCK();
+	/*
+	 * Run SP 800-90B health tests on the source if so configured.
+	 */
+	if (!random_harvest_healthtest(event)) {
+		RANDOM_RESEED_UNLOCK();
+		return;
+	}
 	/*-
 	 * FS&K - P_i = P_i|<harvested stuff>
 	 * Accumulate the event into the appropriate pool
diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index 395310b115fb..c7762967c4fb 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -88,6 +88,8 @@ static void random_sources_feed(void);
 static __read_mostly bool epoch_inited;
 static __read_mostly epoch_t rs_epoch;
 
+static const char *random_source_descr[ENTROPYSOURCE];
+
 /*
  * How many events to queue up. We create this many items in
  * an 'empty' queue, then transfer them to the 'harvest' queue with
@@ -299,6 +301,230 @@ random_sources_feed(void)
 	explicit_bzero(entropy, sizeof(entropy));
 }
 
+/*
+ * State used for conducting NIST SP 800-90B health tests on entropy sources.
+ */
+static struct health_test_softc {
+	uint32_t ht_rct_value[HARVESTSIZE + 1];
+	u_int ht_rct_count;	/* number of samples with the same value */
+	u_int ht_rct_limit;	/* constant after init */
+
+	uint32_t ht_apt_value[HARVESTSIZE + 1];
+	u_int ht_apt_count;	/* number of samples with the same value */
+	u_int ht_apt_seq;	/* sequence number of the last sample */
+	u_int ht_apt_cutoff;	/* constant after init */
+
+	uint64_t ht_total_samples;
+	bool ondemand;		/* Set to true to restart the state machine */
+	enum {
+		INIT = 0,	/* initial state */
+		DISABLED,	/* health checking is disabled */
+		STARTUP,	/* doing startup tests, samples are discarded */
+		STEADY,		/* steady-state operation */
+		FAILED,		/* health check failed, discard samples */
+	} ht_state;
+} healthtest[ENTROPYSOURCE];
+
+#define	RANDOM_SELFTEST_STARTUP_SAMPLES	1024	/* 4.3, requirement 4 */
+#define	RANDOM_SELFTEST_APT_WINDOW	512	/* 4.4.2 */
+
+static void
+copy_event(uint32_t dst[static HARVESTSIZE + 1],
+    const struct harvest_event *event)
+{
+	memset(dst, 0, sizeof(uint32_t) * (HARVESTSIZE + 1));
+	memcpy(dst, event->he_entropy, event->he_size);
+	dst[HARVESTSIZE] = event->he_somecounter;
+}
+
+static void
+random_healthtest_rct_init(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	ht->ht_rct_count = 1;
+	copy_event(ht->ht_rct_value, event);
+}
+
+/*
+ * Apply the repitition count test to a sample.
+ *
+ * Return false if the test failed, i.e., we observed >= C consecutive samples
+ * with the same value, and true otherwise.
+ */
+static bool
+random_healthtest_rct_next(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	uint32_t val[HARVESTSIZE + 1];
+
+	copy_event(val, event);
+	if (memcmp(val, ht->ht_rct_value, sizeof(ht->ht_rct_value)) != 0) {
+		ht->ht_rct_count = 1;
+		memcpy(ht->ht_rct_value, val, sizeof(ht->ht_rct_value));
+		return (true);
+	} else {
+		ht->ht_rct_count++;
+		return (ht->ht_rct_count < ht->ht_rct_limit);
+	}
+}
+
+static void
+random_healthtest_apt_init(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	ht->ht_apt_count = 1;
+	ht->ht_apt_seq = 1;
+	copy_event(ht->ht_apt_value, event);
+}
+
+static bool
+random_healthtest_apt_next(struct health_test_softc *ht,
+    const struct harvest_event *event)
+{
+	uint32_t val[HARVESTSIZE + 1];
+
+	if (ht->ht_apt_seq == 0) {
+		random_healthtest_apt_init(ht, event);
+		return (true);
+	}
+
+	copy_event(val, event);
+	if (memcmp(val, ht->ht_apt_value, sizeof(ht->ht_apt_value)) == 0) {
+		ht->ht_apt_count++;
+		if (ht->ht_apt_count >= ht->ht_apt_cutoff)
+			return (false);
+	}
+
+	ht->ht_apt_seq++;
+	if (ht->ht_apt_seq == RANDOM_SELFTEST_APT_WINDOW)
+		ht->ht_apt_seq = 0;
+
+	return (true);
+}
+
+/*
+ * Run the health tests for the given event.  This is assumed to be called from
+ * a serialized context.
+ */
+bool
+random_harvest_healthtest(const struct harvest_event *event)
+{
+	struct health_test_softc *ht;
+
+	ht = &healthtest[event->he_source];
+
+	/*
+	 * Was on-demand testing requested?  Restart the state machine if so,
+	 * restarting the startup tests.
+	 */
+	if (atomic_load_bool(&ht->ondemand)) {
+		atomic_store_bool(&ht->ondemand, false);
+		ht->ht_state = INIT;
+	}
+
+	switch (ht->ht_state) {
+	case __predict_false(INIT):
+		/* Store the first sample and initialize test state. */
+		random_healthtest_rct_init(ht, event);
+		random_healthtest_apt_init(ht, event);
+		ht->ht_total_samples = 0;
+		ht->ht_state = STARTUP;
+		return (false);
+	case DISABLED:
+		/* No health testing for this source. */
+		return (true);
+	case STEADY:
+	case STARTUP:
+		ht->ht_total_samples++;
+		if (random_healthtest_rct_next(ht, event) &&
+		    random_healthtest_apt_next(ht, event)) {
+			if (ht->ht_state == STARTUP &&
+			    ht->ht_total_samples >=
+			    RANDOM_SELFTEST_STARTUP_SAMPLES) {
+				printf(
+			    "random: health test passed for source %s\n",
+				    random_source_descr[event->he_source]);
+				ht->ht_state = STEADY;
+			}
+			return (ht->ht_state == STEADY);
+		}
+		ht->ht_state = FAILED;
+		printf(
+	    "random: health test failed for source %s, discarding samples\n",
+		    random_source_descr[event->he_source]);
+		/* FALLTHROUGH */
+	case FAILED:
+		return (false);
+	}
+}
+
+static bool nist_healthtest_enabled = false;
+SYSCTL_BOOL(_kern_random, OID_AUTO, nist_healthtest_enabled,
+    CTLFLAG_RDTUN, &nist_healthtest_enabled, 0,
+    "Enable NIST SP 800-90B health tests for noise sources");
+
+static void
+random_healthtest_init(enum random_entropy_source source)
+{
+	struct health_test_softc *ht;
+
+	ht = &healthtest[source];
+	KASSERT(ht->ht_state == INIT,
+	    ("%s: health test state is %d for source %d",
+	    __func__, ht->ht_state, source));
+
+	/*
+	 * If health-testing is enabled, validate all sources except CACHED and
+	 * VMGENID: they are deterministic sources used only a small, fixed
+	 * number of times, so statistical testing is not applicable.
+	 */
+	if (!nist_healthtest_enabled ||
+	    source == RANDOM_CACHED || source == RANDOM_PURE_VMGENID) {
+		ht->ht_state = DISABLED;
+		return;
+	}
+
+	/*
+	 * Set cutoff values for the two tests, assuming that each sample has
+	 * min-entropy of 1 bit and allowing for an error rate of 1 in 2^{34}.
+	 * With a sample rate of RANDOM_KTHREAD_HZ, we expect to see an false
+	 * positive once in ~54.5 years.
+	 *
+	 * The RCT limit comes from the formula in section 4.4.1.
+	 *
+	 * The APT cutoff is calculated using the formula in section 4.4.2
+	 * footnote 10 with the window size changed from 512 to 511, since the
+	 * test as written counts the number of samples equal to the first
+	 * sample in the window, and thus tests W-1 samples.
+	 */
+	ht->ht_rct_limit = 35;
+	ht->ht_apt_cutoff = 330;
+}
+
+static int
+random_healthtest_ondemand(SYSCTL_HANDLER_ARGS)
+{
+	u_int mask, source;
+	int error;
+
+	mask = 0;
+	error = sysctl_handle_int(oidp, &mask, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	while (mask != 0) {
+		source = ffs(mask) - 1;
+		if (source < nitems(healthtest))
+			atomic_store_bool(&healthtest[source].ondemand, true);
+		mask &= ~(1u << source);
+	}
+	return (0);
+}
+SYSCTL_PROC(_kern_random, OID_AUTO, nist_healthtest_ondemand,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
+    random_healthtest_ondemand, "I",
+    "Re-run NIST SP 800-90B startup health tests for a noise source");
+
 static int
 random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS)
 {
@@ -362,7 +588,8 @@ static const char *random_source_descr[ENTROPYSOURCE] = {
 	[RANDOM_SWI] = "SWI",
 	[RANDOM_FS_ATIME] = "FS_ATIME",
 	[RANDOM_UMA] = "UMA",
-	[RANDOM_CALLOUT] = "CALLOUT", /* ENVIRONMENTAL_END */
+	[RANDOM_CALLOUT] = "CALLOUT",
+	[RANDOM_RANDOMDEV] = "RANDOMDEV", /* ENVIRONMENTAL_END */
 	[RANDOM_PURE_OCTEON] = "PURE_OCTEON", /* PURE_START */
 	[RANDOM_PURE_SAFE] = "PURE_SAFE",
 	[RANDOM_PURE_GLXSB] = "PURE_GLXSB",
@@ -424,6 +651,9 @@ random_harvestq_init(void *unused __unused)
 	hc_source_mask = almost_everything_mask;
 	RANDOM_HARVEST_INIT_LOCK();
 	harvest_context.hc_active_buf = 0;
+
+	for (int i = 0; i < ENTROPYSOURCE; i++)
+		random_healthtest_init(i);
 }
 SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL);
 
diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h
index 7804bf52aa4f..1d462500df85 100644
--- a/sys/dev/random/random_harvestq.h
+++ b/sys/dev/random/random_harvestq.h
@@ -49,4 +49,6 @@ random_get_cyclecount(void)
 	return ((uint32_t)get_cyclecount());
 }
 
+bool random_harvest_healthtest(const struct harvest_event *event);
+
 #endif /* SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED */
diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c
index 9d1c7b1167c8..ced4dd8067d9 100644
--- a/sys/dev/random/randomdev.c
+++ b/sys/dev/random/randomdev.c
@@ -312,7 +312,7 @@ randomdev_accumulate(uint8_t *buf, u_int count)
 	for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) {
 		event.he_somecounter = random_get_cyclecount();
 		event.he_size = sizeof(event.he_entropy);
-		event.he_source = RANDOM_CACHED;
+		event.he_source = RANDOM_RANDOMDEV;
 		event.he_destination = destination++; /* Harmless cheating */
 		memcpy(event.he_entropy, entropy_data + i, sizeof(event.he_entropy));
 		p_random_alg_context->ra_event_processor(&event);
diff --git a/sys/dev/ufshci/ufshci_private.h b/sys/dev/ufshci/ufshci_private.h
index cac743884ee6..ac58d44102a0 100644
--- a/sys/dev/ufshci/ufshci_private.h
+++ b/sys/dev/ufshci/ufshci_private.h
@@ -149,6 +149,8 @@ struct ufshci_hw_queue {
 	bus_dmamap_t queuemem_map;
 	bus_addr_t req_queue_addr;
 
+	bus_addr_t *ucd_bus_addr;
+
 	uint32_t num_entries;
 	uint32_t num_trackers;
 
@@ -198,8 +200,6 @@ struct ufshci_req_queue {
 	bus_dma_tag_t dma_tag_payload;
 
 	bus_dmamap_t ucdmem_map;
-
-	bus_addr_t ucd_addr;
 };
 
 struct ufshci_device {
diff --git a/sys/dev/ufshci/ufshci_req_sdb.c b/sys/dev/ufshci/ufshci_req_sdb.c
index 4670281d367a..b1f303afaef5 100644
--- a/sys/dev/ufshci/ufshci_req_sdb.c
+++ b/sys/dev/ufshci/ufshci_req_sdb.c
@@ -48,6 +48,29 @@ ufshci_req_sdb_cmd_desc_destroy(struct ufshci_req_queue *req_queue)
 	}
 }
 
+static void
+ufshci_ucd_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
+{
+	struct ufshci_hw_queue *hwq = arg;
+	int i;
+
+	if (error != 0) {
+		printf("ufshci: Failed to map UCD, error = %d\n", error);
+		return;
+	}
+
+	if (hwq->num_trackers != nseg) {
+		printf(
+		    "ufshci: Failed to map UCD, num_trackers = %d, nseg = %d\n",
+		    hwq->num_trackers, nseg);
+		return;
+	}
+
+	for (i = 0; i < nseg; i++) {
+		hwq->ucd_bus_addr[i] = seg[i].ds_addr;
+	}
+}
+
 static int
 ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
     uint32_t num_entries, struct ufshci_controller *ctrlr)
@@ -55,7 +78,6 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	struct ufshci_hw_queue *hwq = &req_queue->hwq[UFSHCI_SDB_Q];
 	struct ufshci_tracker *tr;
 	size_t ucd_allocsz, payload_allocsz;
-	uint64_t ucdmem_phys;
 	uint8_t *ucdmem;
 	int i, error;
 
@@ -71,10 +93,11 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	 * Allocate physical memory for UTP Command Descriptor (UCD)
 	 * Note: UFSHCI UCD format is restricted to 128-byte alignment.
 	 */
-	error = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 128,
-	    ctrlr->page_size, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
-	    ucd_allocsz, howmany(ucd_allocsz, ctrlr->page_size),
-	    ctrlr->page_size, 0, NULL, NULL, &req_queue->dma_tag_ucd);
+	error = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 128, 0,
+	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, ucd_allocsz,
+	    howmany(ucd_allocsz, sizeof(struct ufshci_utp_cmd_desc)),
+	    sizeof(struct ufshci_utp_cmd_desc), 0, NULL, NULL,
+	    &req_queue->dma_tag_ucd);
 	if (error != 0) {
 		ufshci_printf(ctrlr, "request cmd desc tag create failed %d\n",
 		    error);
@@ -88,7 +111,7 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	}
 
 	if (bus_dmamap_load(req_queue->dma_tag_ucd, req_queue->ucdmem_map,
-		ucdmem, ucd_allocsz, ufshci_single_map, &ucdmem_phys, 0) != 0) {
+		ucdmem, ucd_allocsz, ufshci_ucd_map, hwq, 0) != 0) {
 		ufshci_printf(ctrlr, "failed to load cmd desc memory\n");
 		bus_dmamem_free(req_queue->dma_tag_ucd, req_queue->ucd,
 		    req_queue->ucdmem_map);
@@ -96,7 +119,6 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 	}
 
 	req_queue->ucd = (struct ufshci_utp_cmd_desc *)ucdmem;
-	req_queue->ucd_addr = ucdmem_phys;
 
 	/*
 	 * Allocate physical memory for PRDT
@@ -128,10 +150,9 @@ ufshci_req_sdb_cmd_desc_construct(struct ufshci_req_queue *req_queue,
 		tr->slot_state = UFSHCI_SLOT_STATE_FREE;
 
 		tr->ucd = (struct ufshci_utp_cmd_desc *)ucdmem;
-		tr->ucd_bus_addr = ucdmem_phys;
+		tr->ucd_bus_addr = hwq->ucd_bus_addr[i];
 
 		ucdmem += sizeof(struct ufshci_utp_cmd_desc);
-		ucdmem_phys += sizeof(struct ufshci_utp_cmd_desc);
 
 		hwq->act_tr[i] = tr;
 	}
@@ -175,6 +196,11 @@ ufshci_req_sdb_construct(struct ufshci_controller *ctrlr,
 	req_queue->hwq = malloc(sizeof(struct ufshci_hw_queue), M_UFSHCI,
 	    M_ZERO | M_NOWAIT);
 	hwq = &req_queue->hwq[UFSHCI_SDB_Q];
+	hwq->num_entries = req_queue->num_entries;
+	hwq->num_trackers = req_queue->num_trackers;
+	req_queue->hwq->ucd_bus_addr = malloc(sizeof(bus_addr_t) *
+		req_queue->num_trackers,
+	    M_UFSHCI, M_ZERO | M_NOWAIT);
 
 	mtx_init(&hwq->qlock, "ufshci req_queue lock", NULL, MTX_DEF);
 
@@ -277,6 +303,7 @@ ufshci_req_sdb_destroy(struct ufshci_controller *ctrlr,
 	if (mtx_initialized(&hwq->qlock))
 		mtx_destroy(&hwq->qlock);
 
+	free(req_queue->hwq->ucd_bus_addr, M_UFSHCI);
 	free(req_queue->hwq, M_UFSHCI);
 }
 
diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c
index b50e33ea36ce..d5cfd228a429 100644
--- a/sys/dev/usb/controller/xhci_pci.c
+++ b/sys/dev/usb/controller/xhci_pci.c
@@ -99,6 +99,11 @@ xhci_pci_match(device_t self)
 		return ("AMD Starship USB 3.0 controller");
 	case 0x149c1022:
 		return ("AMD Matisse USB 3.0 controller");
+	case 0x15b61022:
+	case 0x15b71022:
+		return ("AMD Raphael/Granite Ridge USB 3.1 controller");
+	case 0x15b81022:
+		return ("AMD Raphael/Granite Ridge USB 2.0 controller");
 	case 0x15e01022:
 	case 0x15e11022:
 		return ("AMD Raven USB 3.1 controller");
@@ -109,6 +114,8 @@ xhci_pci_match(device_t self)
 		return ("AMD 300 Series USB 3.1 controller");
 	case 0x43d51022:
 		return ("AMD 400 Series USB 3.1 controller");
+	case 0x43f71022:
+		return ("AMD 600 Series USB 3.2 controller");
 	case 0x78121022:
 	case 0x78141022:
 	case 0x79141022:
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index 64039575c0ad..675c0573bd7e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1347,7 +1347,7 @@ vga_postswitch(struct vt_device *vd)
 
 	/* Reinit VGA mode, to restore view after app which change mode. */
 	vga_initialize(vd, (vd->vd_flags & VDF_TEXTMODE));
-	/* Ask vt(9) to update chars on visible area. */
+	/* Ask vt(4) to update chars on visible area. */
 	vd->vd_flags |= VDF_INVALID;
 }
 
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index b0f58b38a6f1..b51ef6766de4 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -125,10 +125,10 @@ static const struct terminal_class vt_termclass = {
 			(vw)->vw_number)
 
 static SYSCTL_NODE(_kern, OID_AUTO, vt, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-    "vt(9) parameters");
+    "vt(4) parameters");
 static VT_SYSCTL_INT(enable_altgr, 1, "Enable AltGr key (Do not assume R.Alt as Alt)");
 static VT_SYSCTL_INT(enable_bell, 0, "Enable bell");
-static VT_SYSCTL_INT(debug, 0, "vt(9) debug level");
+static VT_SYSCTL_INT(debug, 0, "vt(4) debug level");
 static VT_SYSCTL_INT(deadtimer, 15, "Time to wait busy process in VT_PROCESS mode");
 static VT_SYSCTL_INT(suspendswitch, 1, "Switch to VT0 before suspend");