aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/ice
diff options
context:
space:
mode:
authorEric Joyner <erj@FreeBSD.org>2021-12-02 00:50:06 +0000
committerEric Joyner <erj@FreeBSD.org>2022-05-23 23:23:49 +0000
commit8a13362d49bf07dfc654e25976d057adbe0ac9c1 (patch)
treea657c7681b0f7f1ab8730f6b68deca4dff803fbc /sys/dev/ice
parentf16e38162c75015036b1c5e220b2f82a4bd94af1 (diff)
downloadsrc-8a13362d49bf07dfc654e25976d057adbe0ac9c1.tar.gz
src-8a13362d49bf07dfc654e25976d057adbe0ac9c1.zip
Diffstat (limited to 'sys/dev/ice')
-rw-r--r--sys/dev/ice/ice_adminq_cmd.h54
-rw-r--r--sys/dev/ice/ice_common.c219
-rw-r--r--sys/dev/ice/ice_common.h14
-rw-r--r--sys/dev/ice/ice_common_sysctls.h24
-rw-r--r--sys/dev/ice/ice_iflib.h5
-rw-r--r--sys/dev/ice/ice_lib.c27
-rw-r--r--sys/dev/ice/ice_lib.h2
-rw-r--r--sys/dev/ice/ice_rdma.c859
-rw-r--r--sys/dev/ice/ice_rdma.h311
-rw-r--r--sys/dev/ice/ice_rdma_internal.h102
-rw-r--r--sys/dev/ice/ice_sched.c80
-rw-r--r--sys/dev/ice/ice_sched.h1
-rw-r--r--sys/dev/ice/ice_switch.c45
-rw-r--r--sys/dev/ice/ice_switch.h4
-rw-r--r--sys/dev/ice/ice_type.h3
-rw-r--r--sys/dev/ice/if_ice_iflib.c74
-rw-r--r--sys/dev/ice/irdma_di_if.m97
-rw-r--r--sys/dev/ice/irdma_if.m106
18 files changed, 2019 insertions, 8 deletions
diff --git a/sys/dev/ice/ice_adminq_cmd.h b/sys/dev/ice/ice_adminq_cmd.h
index eae8a412d1cc..a07ca6780a3c 100644
--- a/sys/dev/ice/ice_adminq_cmd.h
+++ b/sys/dev/ice/ice_adminq_cmd.h
@@ -2547,6 +2547,57 @@ struct ice_aqc_move_txqs_data {
struct ice_aqc_move_txqs_elem txqs[STRUCT_HACK_VAR_LEN];
};
+/* Add Tx RDMA Queue Set (indirect 0x0C33) */
+struct ice_aqc_add_rdma_qset {
+ u8 num_qset_grps;
+ u8 reserved[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* This is the descriptor of each qset entry for the Add Tx RDMA Queue Set
+ * command (0x0C33). Only used within struct ice_aqc_add_rdma_qset.
+ */
+struct ice_aqc_add_tx_rdma_qset_entry {
+ __le16 tx_qset_id;
+ u8 rsvd[2];
+ __le32 qset_teid;
+ struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_rdma_qset is variable due to the variable
+ * number of queues in each group!
+ */
+struct ice_aqc_add_rdma_qset_data {
+ __le32 parent_teid;
+ __le16 num_qsets;
+ u8 rsvd[2];
+ struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[STRUCT_HACK_VAR_LEN];
+};
+
+/* Move RDMA Queue Set (indirect 0x0C34) */
+struct ice_aqc_move_rdma_qset_cmd {
+ u8 num_rdma_qset; /* Used by commands and response */
+ u8 flags;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Buffer */
+struct ice_aqc_move_rdma_qset_buffer_desc {
+ __le16 tx_qset_id;
+ __le16 qset_teid;
+};
+
+struct ice_aqc_move_rdma_qset_buffer {
+ __le32 src_parent_teid;
+ __le32 dest_parent_teid;
+ struct ice_aqc_move_rdma_qset_buffer_desc descs[STRUCT_HACK_VAR_LEN];
+};
+
/* Download Package (indirect 0x0C40) */
/* Also used for Update Package (indirect 0x0C42 and 0x0C41) */
struct ice_aqc_download_pkg {
@@ -2897,6 +2948,7 @@ struct ice_aq_desc {
struct ice_aqc_add_txqs add_txqs;
struct ice_aqc_dis_txqs dis_txqs;
struct ice_aqc_move_txqs move_txqs;
+ struct ice_aqc_add_rdma_qset add_rdma_qset;
struct ice_aqc_txqs_cleanup txqs_cleanup;
struct ice_aqc_add_get_update_free_vsi vsi_cmd;
struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
@@ -3156,6 +3208,8 @@ enum ice_adminq_opc {
ice_aqc_opc_dis_txqs = 0x0C31,
ice_aqc_opc_txqs_cleanup = 0x0C31,
ice_aqc_opc_move_recfg_txqs = 0x0C32,
+ ice_aqc_opc_add_rdma_qset = 0x0C33,
+ ice_aqc_opc_move_rdma_qset = 0x0C34,
/* package commands */
ice_aqc_opc_download_pkg = 0x0C40,
diff --git a/sys/dev/ice/ice_common.c b/sys/dev/ice/ice_common.c
index 80aa3557bf75..3ae266b72d1f 100644
--- a/sys/dev/ice/ice_common.c
+++ b/sys/dev/ice/ice_common.c
@@ -1198,7 +1198,8 @@ enum ice_status ice_check_reset(struct ice_hw *hw)
GLNVM_ULD_POR_DONE_1_M |\
GLNVM_ULD_PCIER_DONE_2_M)
- uld_mask = ICE_RESET_DONE_MASK;
+ uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.iwarp ?
+ GLNVM_ULD_PE_DONE_M : 0);
/* Device is Active; check Global Reset processes are done */
for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
@@ -2364,6 +2365,10 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
ice_debug(hw, ICE_DBG_INIT, "%s: mgmt_cem = %d\n", prefix,
caps->mgmt_cem);
break;
+ case ICE_AQC_CAPS_IWARP:
+ caps->iwarp = (number == 1);
+ ice_debug(hw, ICE_DBG_INIT, "%s: iwarp = %d\n", prefix, caps->iwarp);
+ break;
case ICE_AQC_CAPS_LED:
if (phys_id < ICE_MAX_SUPPORTED_GPIO_LED) {
caps->led[phys_id] = true;
@@ -2481,6 +2486,16 @@ ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
caps->maxtc = 4;
ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n",
caps->maxtc);
+ if (caps->iwarp) {
+ ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n");
+ caps->iwarp = 0;
+ }
+
+ /* print message only when processing device capabilities
+ * during initialization.
+ */
+ if (caps == &hw->dev_caps.common_cap)
+ ice_info(hw, "RDMA functionality is not available with the current device configuration.\n");
}
}
@@ -4338,6 +4353,56 @@ ice_aq_move_recfg_lan_txq(struct ice_hw *hw, u8 num_qs, bool is_move,
return status;
}
+/**
+ * ice_aq_add_rdma_qsets
+ * @hw: pointer to the hardware structure
+ * @num_qset_grps: Number of RDMA Qset groups
+ * @qset_list: list of qset groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx RDMA Qsets (0x0C33)
+ */
+enum ice_status
+ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
+ struct ice_aqc_add_rdma_qset_data *qset_list,
+ u16 buf_size, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_add_rdma_qset_data *list;
+ struct ice_aqc_add_rdma_qset *cmd;
+ struct ice_aq_desc desc;
+ u16 i, sum_size = 0;
+
+ ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__);
+
+ cmd = &desc.params.add_rdma_qset;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset);
+
+ if (!qset_list)
+ return ICE_ERR_PARAM;
+
+ if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS)
+ return ICE_ERR_PARAM;
+
+ for (i = 0, list = qset_list; i < num_qset_grps; i++) {
+ u16 num_qsets = LE16_TO_CPU(list->num_qsets);
+
+ sum_size += ice_struct_size(list, rdma_qsets, num_qsets);
+ list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets +
+ num_qsets);
+ }
+
+ if (buf_size != sum_size)
+ return ICE_ERR_PARAM;
+
+ desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD);
+
+ cmd->num_qset_grps = num_qset_grps;
+
+ return ice_aq_send_cmd(hw, &desc, qset_list, buf_size, cd);
+}
+
/* End of FW Admin Queue command wrappers */
/**
@@ -5101,6 +5166,158 @@ ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
}
/**
+ * ice_cfg_vsi_rdma - configure the VSI RDMA queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_rdmaqs: max RDMA queues array per TC
+ *
+ * This function adds/updates the VSI RDMA queues per TC.
+ */
+enum ice_status
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+ u16 *max_rdmaqs)
+{
+ return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_rdmaqs,
+ ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
+ * ice_ena_vsi_rdma_qset
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @rdma_qset: pointer to RDMA qset
+ * @num_qsets: number of RDMA qsets
+ * @qset_teid: pointer to qset node teids
+ *
+ * This function adds RDMA qset
+ */
+enum ice_status
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 *rdma_qset, u16 num_qsets, u32 *qset_teid)
+{
+ struct ice_aqc_txsched_elem_data node = { 0 };
+ struct ice_aqc_add_rdma_qset_data *buf;
+ struct ice_sched_node *parent;
+ enum ice_status status;
+ struct ice_hw *hw;
+ u16 i, buf_size;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return ICE_ERR_CFG;
+ hw = pi->hw;
+
+ if (!ice_is_vsi_valid(hw, vsi_handle))
+ return ICE_ERR_PARAM;
+
+ buf_size = ice_struct_size(buf, rdma_qsets, num_qsets);
+ buf = (struct ice_aqc_add_rdma_qset_data *)ice_malloc(hw, buf_size);
+ if (!buf)
+ return ICE_ERR_NO_MEMORY;
+ ice_acquire_lock(&pi->sched_lock);
+
+ parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+ ICE_SCHED_NODE_OWNER_RDMA);
+ if (!parent) {
+ status = ICE_ERR_PARAM;
+ goto rdma_error_exit;
+ }
+ buf->parent_teid = parent->info.node_teid;
+ node.parent_teid = parent->info.node_teid;
+
+ buf->num_qsets = CPU_TO_LE16(num_qsets);
+ for (i = 0; i < num_qsets; i++) {
+ buf->rdma_qsets[i].tx_qset_id = CPU_TO_LE16(rdma_qset[i]);
+ buf->rdma_qsets[i].info.valid_sections =
+ ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+ ICE_AQC_ELEM_VALID_EIR;
+ buf->rdma_qsets[i].info.generic = 0;
+ buf->rdma_qsets[i].info.cir_bw.bw_profile_idx =
+ CPU_TO_LE16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->rdma_qsets[i].info.cir_bw.bw_alloc =
+ CPU_TO_LE16(ICE_SCHED_DFLT_BW_WT);
+ buf->rdma_qsets[i].info.eir_bw.bw_profile_idx =
+ CPU_TO_LE16(ICE_SCHED_DFLT_RL_PROF_ID);
+ buf->rdma_qsets[i].info.eir_bw.bw_alloc =
+ CPU_TO_LE16(ICE_SCHED_DFLT_BW_WT);
+ }
+ status = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL);
+ if (status != ICE_SUCCESS) {
+ ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n");
+ goto rdma_error_exit;
+ }
+ node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+ for (i = 0; i < num_qsets; i++) {
+ node.node_teid = buf->rdma_qsets[i].qset_teid;
+ status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1,
+ &node);
+ if (status)
+ break;
+ qset_teid[i] = LE32_TO_CPU(node.node_teid);
+ }
+rdma_error_exit:
+ ice_release_lock(&pi->sched_lock);
+ ice_free(hw, buf);
+ return status;
+}
+
+/**
+ * ice_dis_vsi_rdma_qset - free RDMA resources
+ * @pi: port_info struct
+ * @count: number of RDMA qsets to free
+ * @qset_teid: TEID of qset node
+ * @q_id: list of queue IDs being disabled
+ */
+enum ice_status
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+ u16 *q_id)
+{
+ struct ice_aqc_dis_txq_item *qg_list;
+ enum ice_status status = ICE_SUCCESS;
+ struct ice_hw *hw;
+ u16 qg_size;
+ int i;
+
+ if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+ return ICE_ERR_CFG;
+
+ hw = pi->hw;
+
+ qg_size = ice_struct_size(qg_list, q_id, 1);
+ qg_list = (struct ice_aqc_dis_txq_item *)ice_malloc(hw, qg_size);
+ if (!qg_list)
+ return ICE_ERR_NO_MEMORY;
+
+ ice_acquire_lock(&pi->sched_lock);
+
+ for (i = 0; i < count; i++) {
+ struct ice_sched_node *node;
+
+ node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]);
+ if (!node)
+ continue;
+
+ qg_list->parent_teid = node->info.parent_teid;
+ qg_list->num_qs = 1;
+ qg_list->q_id[0] =
+ CPU_TO_LE16(q_id[i] |
+ ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET);
+
+ status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size,
+ ICE_NO_RESET, 0, NULL);
+ if (status)
+ break;
+
+ ice_free_sched_node(pi, node);
+ }
+
+ ice_release_lock(&pi->sched_lock);
+ ice_free(hw, qg_list);
+ return status;
+}
+
+/**
* ice_is_main_vsi - checks whether the VSI is main VSI
* @hw: pointer to the HW struct
* @vsi_handle: VSI handle
diff --git a/sys/dev/ice/ice_common.h b/sys/dev/ice/ice_common.h
index 48fd52cb2484..b113082b2394 100644
--- a/sys/dev/ice/ice_common.h
+++ b/sys/dev/ice/ice_common.h
@@ -147,6 +147,11 @@ ice_aq_move_recfg_lan_txq(struct ice_hw *hw, u8 num_qs, bool is_move,
struct ice_aqc_move_txqs_data *buf, u16 buf_size,
u8 *txqs_moved, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
+ struct ice_aqc_add_rdma_qset_data *qset_list,
+ u16 buf_size, struct ice_sq_cd *cd);
+
bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
@@ -257,6 +262,15 @@ __ice_write_sr_word(struct ice_hw *hw, u32 offset, const u16 *data);
enum ice_status
__ice_write_sr_buf(struct ice_hw *hw, u32 offset, u16 words, const u16 *data);
enum ice_status
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+ u16 *max_rdmaqs);
+enum ice_status
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+ u16 *rdma_qset, u16 num_qsets, u32 *qset_teid);
+enum ice_status
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+ u16 *q_id);
+enum ice_status
ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
u16 *q_handle, u16 *q_ids, u32 *q_teids,
enum ice_disq_rst_src rst_src, u16 vmvf_num,
diff --git a/sys/dev/ice/ice_common_sysctls.h b/sys/dev/ice/ice_common_sysctls.h
index f1a23ce828ff..0d149a5bc25c 100644
--- a/sys/dev/ice/ice_common_sysctls.h
+++ b/sys/dev/ice/ice_common_sysctls.h
@@ -46,6 +46,15 @@
#include <sys/sysctl.h>
/**
+ * @var ice_enable_irdma
+ * @brief boolean indicating if the iRDMA client interface is enabled
+ *
+ * Global sysctl variable indicating whether the RDMA client interface feature
+ * is enabled.
+ */
+bool ice_enable_irdma = true;
+
+/**
* @var ice_enable_tx_fc_filter
* @brief boolean indicating if the Tx Flow Control filter should be enabled
*
@@ -85,6 +94,15 @@ bool ice_enable_tx_lldp_filter = true;
*/
bool ice_enable_health_events = true;
+/**
+ * @var ice_rdma_max_msix
+ * @brief maximum number of MSI-X vectors to reserve for RDMA interface
+ *
+ * Global sysctl variable indicating the maximum number of MSI-X vectors to
+ * reserve for a single RDMA interface.
+ */
+static uint16_t ice_rdma_max_msix = ICE_RDMA_MAX_MSIX;
+
/* sysctls marked as tunable, (i.e. with the CTLFLAG_TUN set) will
* automatically load tunable values, without the need to manually create the
* TUNABLE definition.
@@ -105,6 +123,12 @@ SYSCTL_BOOL(_hw_ice, OID_AUTO, enable_health_events, CTLFLAG_RDTUN,
&ice_enable_health_events, 0,
"Enable FW health event reporting globally");
+SYSCTL_BOOL(_hw_ice, OID_AUTO, irdma, CTLFLAG_RDTUN, &ice_enable_irdma, 0,
+ "Enable iRDMA client interface");
+
+SYSCTL_U16(_hw_ice, OID_AUTO, rdma_max_msix, CTLFLAG_RDTUN, &ice_rdma_max_msix,
+ 0, "Maximum number of MSI-X vectors to reserve per RDMA interface");
+
SYSCTL_BOOL(_hw_ice_debug, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN,
&ice_enable_tx_fc_filter, 0,
"Drop Ethertype 0x8808 control frames originating from non-HW sources");
diff --git a/sys/dev/ice/ice_iflib.h b/sys/dev/ice/ice_iflib.h
index 947881f1d076..07654afe4539 100644
--- a/sys/dev/ice/ice_iflib.h
+++ b/sys/dev/ice/ice_iflib.h
@@ -236,6 +236,11 @@ struct ice_softc {
struct mtx admin_mtx; /* mutex to protect the admin timer */
struct callout admin_timer; /* timer to trigger admin task */
+ /* iRDMA peer interface */
+ struct ice_rdma_entry rdma_entry;
+ int irdma_vectors;
+ u16 *rdma_imap;
+
struct ice_vsi **all_vsi; /* Array of VSI pointers */
u16 num_available_vsi; /* Size of VSI array */
diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c
index 4212a0c76c88..f562b3b55b63 100644
--- a/sys/dev/ice/ice_lib.c
+++ b/sys/dev/ice/ice_lib.c
@@ -3984,6 +3984,11 @@ ice_config_pfc(struct ice_softc *sc, u8 new_mode)
local_dcbx_cfg->pfc.willing = 0;
local_dcbx_cfg->pfc.mbc = 0;
+ /* Warn if PFC is being disabled with RoCE v2 in use */
+ if (new_mode == 0 && sc->rdma_entry.attached)
+ device_printf(dev,
+ "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
+
status = ice_set_dcb_cfg(pi);
if (status) {
device_printf(dev,
@@ -7800,6 +7805,8 @@ ice_do_dcb_reconfig(struct ice_softc *sc)
pi = sc->hw.port_info;
local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+ ice_rdma_notify_dcb_qos_change(sc);
+
/* Set state when there's more than one TC */
tc_map = ice_dcb_get_tc_map(local_dcbx_cfg);
if (ice_dcb_num_tc(tc_map) > 1) {
@@ -7826,6 +7833,9 @@ ice_do_dcb_reconfig(struct ice_softc *sc)
/* Change PF VSI configuration */
ice_dcb_recfg(sc);
+ /* Send new configuration to RDMA client driver */
+ ice_rdma_dcb_qos_update(sc, pi);
+
ice_request_stack_reinit(sc);
}
@@ -8663,6 +8673,7 @@ ice_init_saved_phy_cfg(struct ice_softc *sc)
static int
ice_module_init(void)
{
+ ice_rdma_init();
return (0);
}
@@ -8679,6 +8690,7 @@ ice_module_init(void)
static int
ice_module_exit(void)
{
+ ice_rdma_exit();
return (0);
}
@@ -9029,8 +9041,17 @@ ice_alloc_intr_tracking(struct ice_softc *sc)
err = ENOMEM;
goto free_imgr;
}
+ if (!(sc->rdma_imap =
+ (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
+ M_ICE, M_NOWAIT))) {
+ device_printf(dev, "Unable to allocate RDMA imap memory\n");
+ err = ENOMEM;
+ free(sc->pf_imap, M_ICE);
+ goto free_imgr;
+ }
for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
sc->pf_imap[i] = ICE_INVALID_RES_IDX;
+ sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
}
return (0);
@@ -9058,6 +9079,12 @@ ice_free_intr_tracking(struct ice_softc *sc)
free(sc->pf_imap, M_ICE);
sc->pf_imap = NULL;
}
+ if (sc->rdma_imap) {
+ ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
+ sc->lan_vectors);
+ free(sc->rdma_imap, M_ICE);
+ sc->rdma_imap = NULL;
+ }
ice_resmgr_destroy(&sc->imgr);
}
diff --git a/sys/dev/ice/ice_lib.h b/sys/dev/ice/ice_lib.h
index d0514a58a745..948f9858d43d 100644
--- a/sys/dev/ice/ice_lib.h
+++ b/sys/dev/ice/ice_lib.h
@@ -65,6 +65,8 @@
#include "ice_sched.h"
#include "ice_resmgr.h"
+#include "ice_rdma_internal.h"
+
#include "ice_rss.h"
/* Hide debug sysctls unless INVARIANTS is enabled */
diff --git a/sys/dev/ice/ice_rdma.c b/sys/dev/ice/ice_rdma.c
new file mode 100644
index 000000000000..5d89deed0f90
--- /dev/null
+++ b/sys/dev/ice/ice_rdma.c
@@ -0,0 +1,859 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2022, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*$FreeBSD$*/
+
+/**
+ * @file ice_rdma.c
+ * @brief RDMA client driver interface
+ *
+ * Functions to interface with the RDMA client driver, for enabling RMDA
+ * functionality for the ice driver.
+ *
+ * The RDMA client interface is based on a simple kobject interface which is
+ * defined by the rmda_if.m and irdma_di_if.m interfaces.
+ *
+ * The ice device driver provides the rmda_di_if.m interface methods, while
+ * the client RDMA driver provides the irdma_if.m interface methods as an
+ * extension ontop of the irdma_di_if kobject.
+ *
+ * The initial connection between drivers is done via the RDMA client driver
+ * calling ice_rdma_register.
+ */
+
+#include "ice_iflib.h"
+#include "ice_rdma_internal.h"
+
+#include "irdma_if.h"
+#include "irdma_di_if.h"
+
+/**
+ * @var ice_rdma
+ * @brief global RDMA driver state
+ *
+ * Contains global state the driver uses to connect to a client RDMA interface
+ * driver.
+ */
+static struct ice_rdma_state ice_rdma;
+
+/*
+ * Helper function prototypes
+ */
+static int ice_rdma_pf_attach_locked(struct ice_softc *sc);
+static void ice_rdma_pf_detach_locked(struct ice_softc *sc);
+static int ice_rdma_check_version(struct ice_rdma_info *info);
+static void ice_rdma_cp_qos_info(struct ice_hw *hw,
+ struct ice_dcbx_cfg *dcbx_cfg,
+ struct ice_qos_params *qos_info);
+
+/*
+ * RDMA Device Interface prototypes
+ */
+static int ice_rdma_pf_reset(struct ice_rdma_peer *peer);
+static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
+ struct ice_rdma_msix_mapping *msix_info);
+static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer,
+ struct ice_rdma_qset_update *res);
+static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev,
+ bool enable);
+static void ice_rdma_request_handler(struct ice_rdma_peer *peer,
+ struct ice_rdma_request *req);
+
+
+/**
+ * @var ice_rdma_di_methods
+ * @brief RDMA driver interface methods
+ *
+ * Kobject methods implementing the driver-side interface for the RDMA peer
+ * clients. This method table contains the operations which the client can
+ * request from the driver.
+ *
+ * The client driver will then extend this kobject class with methods that the
+ * driver can request from the client.
+ */
+static kobj_method_t ice_rdma_di_methods[] = {
+ KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset),
+ KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init),
+ KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request),
+ KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter),
+ KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler),
+ KOBJMETHOD_END
+};
+
+/* Define ice_rdma_di class which will be extended by the iRDMA driver */
+DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer));
+
+/**
+ * ice_rdma_pf_reset - RDMA client interface requested a reset
+ * @peer: the RDMA peer client structure
+ *
+ * Implements IRDMA_DI_RESET, called by the RDMA client driver to request
+ * a reset of an ice driver device.
+ */
+static int
+ice_rdma_pf_reset(struct ice_rdma_peer *peer)
+{
+ struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
+
+ /*
+ * Request that the driver re-initialize by bringing the interface
+ * down and up.
+ */
+ ice_request_stack_reinit(sc);
+
+ return (0);
+}
+
+/**
+ * ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization
+ * @peer: the RDMA peer client structure
+ * @msix_info: requested MSI-X mapping
+ *
+ * Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to
+ * initialize the MSI-X resources required for RDMA functionality.
+ */
+static int
+ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
+ struct ice_rdma_msix_mapping __unused *msix_info)
+{
+ struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
+
+ MPASS(msix_info != NULL);
+
+ device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__);
+
+ /* TODO: implement MSI-X initialization for RDMA */
+ return (ENOSYS);
+}
+
+/**
+ * ice_rdma_register_request - RDMA client interface request qset
+ * registration or unregistration
+ * @peer: the RDMA peer client structure
+ * @res: resources to be registered or unregistered
+ */
+static int
+ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res)
+{
+ struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
+ struct ice_vsi *vsi = NULL;
+ struct ice_dcbx_cfg *dcbx_cfg;
+ struct ice_hw *hw = &sc->hw;
+ enum ice_status status;
+ int count, i, ret = 0;
+ uint32_t *qset_teid;
+ uint16_t *qs_handle;
+ uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
+ uint16_t vsi_id;
+ uint8_t ena_tc = 0;
+
+ if (!res)
+ return -EINVAL;
+
+ if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG)
+ return -EINVAL;
+
+ switch(res->res_type) {
+ case ICE_RDMA_QSET_ALLOC:
+ count = res->cnt_req;
+ vsi_id = peer->pf_vsi_num;
+ break;
+ case ICE_RDMA_QSET_FREE:
+ count = res->res_allocated;
+ vsi_id = res->qsets.vsi_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+ qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid));
+ if (!qset_teid)
+ return -ENOMEM;
+
+ qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle));
+ if (!qs_handle) {
+ ice_free(hw, qset_teid);
+ return -ENOMEM;
+ }
+
+ ice_for_each_traffic_class(i)
+ max_rdmaqs[i] = 0;
+ for (i = 0; i < sc->num_available_vsi; i++) {
+ if (sc->all_vsi[i] &&
+ ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) {
+ vsi = sc->all_vsi[i];
+ break;
+ }
+ }
+
+ if (!vsi) {
+ ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (sc != vsi->sc) {
+ ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n");
+ ret = -EXDEV;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ struct ice_rdma_qset_params *qset;
+
+ qset = &res->qsets;
+ if (qset->vsi_id != peer->pf_vsi_num) {
+ ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n",
+ qset->vsi_id, peer->pf_vsi_num);
+ ret = -EINVAL;
+ goto out;
+ }
+ max_rdmaqs[qset->tc]++;
+ qs_handle[i] = qset->qs_handle;
+ qset_teid[i] = qset->teid;
+ }
+
+ switch(res->res_type) {
+ case ICE_RDMA_QSET_ALLOC:
+ dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
+ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+ ena_tc |= BIT(dcbx_cfg->etscfg.prio_table[i]);
+ }
+
+ ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc);
+ status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc,
+ max_rdmaqs);
+ if (status) {
+ ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ struct ice_rdma_qset_params *qset;
+
+ qset = &res->qsets;
+ status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx,
+ qset->tc, &qs_handle[i], 1,
+ &qset_teid[i]);
+ if (status) {
+ ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ qset->teid = qset_teid[i];
+ }
+ break;
+ case ICE_RDMA_QSET_FREE:
+ status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle);
+ if (status)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+out:
+ ice_free(hw, qs_handle);
+ ice_free(hw, qset_teid);
+
+ return ret;
+}
+
+/**
+ * ice_rdma_update_vsi_filter - configure vsi information
+ * when opening or closing rdma driver
+ * @peer: the RDMA peer client structure
+ * @enable: enable or disable the rdma filter
+ */
+static int
+ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer,
+ bool enable)
+{
+ struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
+ struct ice_vsi *vsi;
+ int ret;
+
+ vsi = &sc->pf_vsi;
+ if (!vsi)
+ return -EINVAL;
+
+ ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable);
+ if (ret) {
+ device_printf(sc->dev, "Failed to %sable iWARP filtering\n",
+ enable ? "en" : "dis");
+ } else {
+ if (enable)
+ vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ else
+ vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_rdma_request_handler - handle requests incoming from RDMA driver
+ * @peer: the RDMA peer client structure
+ * @req: structure containing request
+ */
+static void
+ice_rdma_request_handler(struct ice_rdma_peer *peer,
+ struct ice_rdma_request *req)
+{
+ if (!req || !peer) {
+ log(LOG_WARNING, "%s: peer or req are not valid\n", __func__);
+ return;
+ }
+
+ switch(req->type) {
+ case ICE_RDMA_EVENT_RESET:
+ break;
+ case ICE_RDMA_EVENT_QSET_REGISTER:
+ ice_rdma_qset_register_request(peer, &req->res);
+ break;
+ case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
+ ice_rdma_update_vsi_filter(peer, req->enable_filter);
+ break;
+ default:
+ log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type);
+ break;
+ }
+}
+
+/**
+ * ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass
+ * to RDMA driver
+ * @hw: ice hw structure
+ * @dcbx_cfg: current DCB settings in ice driver
+ * @qos_info: destination of the DCB settings
+ */
+static void
+ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg,
+ struct ice_qos_params *qos_info)
+{
+ u32 up2tc;
+ u8 j;
+ u8 num_tc = 0;
+ u8 val_tc = 0; /* number of TC for validation */
+ u8 cnt_tc = 0;
+
+ /* setup qos_info fields with defaults */
+ qos_info->num_apps = 0;
+ qos_info->num_tc = 1;
+
+ for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
+ qos_info->up2tc[j] = 0;
+
+ qos_info->tc_info[0].rel_bw = 100;
+ for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++)
+ qos_info->tc_info[j].rel_bw = 0;
+
+ /* gather current values */
+ up2tc = rd32(hw, PRTDCB_TUP2TC);
+ qos_info->num_apps = dcbx_cfg->numapps;
+
+ for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
+ num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]);
+ }
+ for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
+ if (num_tc & BIT(j)) {
+ cnt_tc++;
+ val_tc |= BIT(j);
+ } else {
+ break;
+ }
+ }
+ qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1;
+ for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
+ qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7;
+
+ for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++)
+ qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j];
+ for (j = 0; j < qos_info->num_apps; j++) {
+ qos_info->apps[j].priority = dcbx_cfg->app[j].priority;
+ qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id;
+ qos_info->apps[j].selector = dcbx_cfg->app[j].selector;
+ }
+}
+
+/**
+ * ice_rdma_check_version - Check that the provided RDMA version is compatible
+ * @info: the RDMA client information structure
+ *
+ * Verify that the client RDMA driver provided a version that is compatible
+ * with the driver interface.
+ */
+static int
+ice_rdma_check_version(struct ice_rdma_info *info)
+{
+ /* Make sure the MAJOR version matches */
+ if (info->major_version != ICE_RDMA_MAJOR_VERSION) {
+ log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n",
+ __func__,
+ info->major_version, info->minor_version, info->patch_version,
+ ICE_RDMA_MAJOR_VERSION);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Make sure that the MINOR version is compatible.
+ *
+ * This means that the RDMA client driver version MUST not be greater
+ * than the version provided by the driver, as it would indicate that
+ * the RDMA client expects features which are not supported by the
+ * main driver.
+ */
+ if (info->minor_version > ICE_RDMA_MINOR_VERSION) {
+ log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n",
+ __func__,
+ info->major_version, info->minor_version, info->patch_version,
+ ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Make sure that the PATCH version is compatible.
+ *
+ * This means that the RDMA client version MUST not be greater than
+ * the version provided by the driver, as it may indicate that the
+ * RDMA client expects certain backwards compatible bug fixes which
+ * are not implemented by this version of the main driver.
+ */
+ if ((info->minor_version == ICE_RDMA_MINOR_VERSION) &&
+ (info->patch_version > ICE_RDMA_PATCH_VERSION)) {
+ log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n",
+ __func__,
+ info->major_version, info->minor_version, info->patch_version,
+ ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION);
+ return (ENOTSUP);
+ }
+
+ /* Make sure that the kobject class is initialized */
+ if (info->rdma_class == NULL) {
+ log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n",
+ __func__);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/**
+ * ice_rdma_register - Register an RDMA client driver
+ * @info: the RDMA client information structure
+ *
+ * Called by the RDMA client driver on load. Used to initialize the RDMA
+ * client driver interface and enable interop between the ice driver and the
+ * RDMA client driver.
+ *
+ * The RDMA client driver must provide the version number it expects, along
+ * with a pointer to a kobject class that extends the irdma_di_if class, and
+ * implements the irdma_if class interface.
+ */
+int
+ice_rdma_register(struct ice_rdma_info *info)
+{
+ struct ice_rdma_entry *entry;
+ int err = 0;
+
+ sx_xlock(&ice_rdma.mtx);
+
+ if (!ice_enable_irdma) {
+ log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__);
+ err = (ECONNREFUSED);
+ goto return_unlock;
+ }
+
+ if (ice_rdma.registered) {
+ log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__);
+ err = (EBUSY);
+ goto return_unlock;
+ }
+
+ /* Make sure the iRDMA version is compatible */
+ err = ice_rdma_check_version(info);
+ if (err)
+ goto return_unlock;
+
+ log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n",
+ __func__, info->major_version, info->minor_version, info->patch_version);
+
+ ice_rdma.peer_class = info->rdma_class;
+
+ /*
+ * Initialize the kobject interface and notify the RDMA client of each
+ * existing PF interface.
+ */
+ LIST_FOREACH(entry, &ice_rdma.peers, node) {
+ kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
+ IRDMA_PROBE(&entry->peer);
+ if (entry->initiated)
+ IRDMA_OPEN(&entry->peer);
+ }
+ ice_rdma.registered = true;
+
+return_unlock:
+ sx_xunlock(&ice_rdma.mtx);
+
+ return (err);
+}
+
+/**
+ * ice_rdma_unregister - Unregister an RDMA client driver
+ *
+ * Called by the RDMA client driver on unload. Used to de-initialize the RDMA
+ * client driver interface and shut down communication between the ice driver
+ * and the RDMA client driver.
+ */
+int
+ice_rdma_unregister(void)
+{
+ struct ice_rdma_entry *entry;
+
+ sx_xlock(&ice_rdma.mtx);
+
+ if (!ice_rdma.registered) {
+ log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n",
+ __func__);
+ sx_xunlock(&ice_rdma.mtx);
+ return (ENOENT);
+ }
+
+ log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__);
+ ice_rdma.registered = false;
+ ice_rdma.peer_class = NULL;
+
+ /*
+ * Release the kobject interface for each of the existing PF
+ * interfaces. Note that we do not notify the client about removing
+ * each PF, as it is assumed that the client will have already cleaned
+ * up any associated resources when it is unregistered.
+ */
+ LIST_FOREACH(entry, &ice_rdma.peers, node)
+ kobj_delete((kobj_t)&entry->peer, NULL);
+
+ sx_xunlock(&ice_rdma.mtx);
+
+ return (0);
+}
+
+/**
+ * ice_rdma_init - RDMA driver init routine
+ *
+ * Called during ice driver module initialization to setup the RDMA client
+ * interface mutex and RDMA peer structure list.
+ */
+void
+ice_rdma_init(void)
+{
+ LIST_INIT(&ice_rdma.peers);
+ sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK);
+
+ ice_rdma.registered = false;
+ ice_rdma.peer_class = NULL;
+}
+
+/**
+ * ice_rdma_exit - RDMA driver exit routine
+ *
+ * Called during ice driver module exit to shutdown the RDMA client interface
+ * mutex.
+ */
+void
+ice_rdma_exit(void)
+{
+ MPASS(LIST_EMPTY(&ice_rdma.peers));
+ sx_destroy(&ice_rdma.mtx);
+}
+
+/**
+ * ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections
+ * @sc: the ice driver softc
+ *
+ * Initialize a peer entry for this PF and add it to the RDMA interface list.
+ * Notify the client RDMA driver of a new PF device.
+ *
+ * @pre must be called while holding the ice_rdma mutex.
+ */
+static int
+ice_rdma_pf_attach_locked(struct ice_softc *sc)
+{
+ struct ice_rdma_entry *entry;
+
+ /* Do not attach the PF unless RDMA is supported */
+ if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA))
+ return (0);
+
+ entry = &sc->rdma_entry;
+ if (entry->attached) {
+ device_printf(sc->dev, "iRDMA peer entry already exists\n");
+ return (EEXIST);
+ }
+
+ entry->attached = true;
+ entry->peer.dev = sc->dev;
+ entry->peer.ifp = sc->ifp;
+ entry->peer.pf_id = sc->hw.pf_id;
+ entry->peer.pci_mem = sc->bar0.res;
+ entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx);
+ if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX &&
+ sc->irdma_vectors > 0) {
+ entry->peer.msix.base = sc->rdma_imap[0];
+ entry->peer.msix.count = sc->irdma_vectors;
+ }
+
+ /* Gather DCB/QOS info into peer */
+ memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
+ ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
+ &entry->peer.initial_qos_info);
+
+ /*
+ * If the RDMA client driver has already registered, initialize the
+ * kobject and notify the client of a new PF
+ */
+ if (ice_rdma.registered) {
+ kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
+ IRDMA_PROBE(&entry->peer);
+ }
+
+ LIST_INSERT_HEAD(&ice_rdma.peers, entry, node);
+
+ ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en);
+
+ return (0);
+}
+
+/**
+ * ice_rdma_pf_attach - Notify the RDMA client of a new PF
+ * @sc: the ice driver softc
+ *
+ * Called during PF attach to notify the RDMA client of a new PF.
+ */
+int
+ice_rdma_pf_attach(struct ice_softc *sc)
+{
+ int err;
+
+ sx_xlock(&ice_rdma.mtx);
+ err = ice_rdma_pf_attach_locked(sc);
+ sx_xunlock(&ice_rdma.mtx);
+
+ return (err);
+}
+
+/**
+ * ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach
+ * @sc: the ice driver softc
+ *
+ * Notify the RDMA peer client driver of removal of a PF, and release any
+ * RDMA-specific resources associated with that PF. Remove the PF from the
+ * list of available RDMA entries.
+ *
+ * @pre must be called while holding the ice_rdma mutex.
+ */
+static void
+ice_rdma_pf_detach_locked(struct ice_softc *sc)
+{
+ struct ice_rdma_entry *entry;
+
+ /* No need to detach the PF if RDMA is not enabled */
+ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA))
+ return;
+
+ entry = &sc->rdma_entry;
+ if (!entry->attached) {
+ device_printf(sc->dev, "iRDMA peer entry was not attached\n");
+ return;
+ }
+
+ /*
+ * If the RDMA client driver is registered, notify the client that
+ * a PF has been removed, and release the kobject reference.
+ */
+ if (ice_rdma.registered) {
+ IRDMA_REMOVE(&entry->peer);
+ kobj_delete((kobj_t)&entry->peer, NULL);
+ }
+
+ LIST_REMOVE(entry, node);
+ entry->attached = false;
+
+ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en);
+}
+
+/**
+ * ice_rdma_pf_detach - Notify the RDMA client of a PF detaching
+ * @sc: the ice driver softc
+ *
+ * Take the ice_rdma mutex and then notify the RDMA client that a PF has been
+ * removed.
+ */
+void
+ice_rdma_pf_detach(struct ice_softc *sc)
+{
+ sx_xlock(&ice_rdma.mtx);
+ ice_rdma_pf_detach_locked(sc);
+ sx_xunlock(&ice_rdma.mtx);
+}
+
+/**
+ * ice_rdma_pf_init - Notify the RDMA client that a PF has initialized
+ * @sc: the ice driver softc
+ *
+ * Called by the ice driver when a PF has been initialized. Notifies the RDMA
+ * client that a PF is up and ready to operate.
+ */
+int
+ice_rdma_pf_init(struct ice_softc *sc)
+{
+ struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+
+ sx_xlock(&ice_rdma.mtx);
+
+ /* Update the MTU */
+ peer->mtu = sc->ifp->if_mtu;
+ sc->rdma_entry.initiated = true;
+
+ if (sc->rdma_entry.attached && ice_rdma.registered) {
+ sx_xunlock(&ice_rdma.mtx);
+ return IRDMA_OPEN(peer);
+ }
+
+ sx_xunlock(&ice_rdma.mtx);
+
+ return (0);
+}
+
+/**
+ * ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device
+ * @sc: the ice driver softc
+ *
+ * Called by the ice driver when a PF is stopped. Notifies the RDMA client
+ * driver that the PF has stopped and is not ready to operate.
+ */
+int
+ice_rdma_pf_stop(struct ice_softc *sc)
+{
+ sx_xlock(&ice_rdma.mtx);
+
+ sc->rdma_entry.initiated = false;
+ if (sc->rdma_entry.attached && ice_rdma.registered) {
+ sx_xunlock(&ice_rdma.mtx);
+ return IRDMA_CLOSE(&sc->rdma_entry.peer);
+ }
+
+ sx_xunlock(&ice_rdma.mtx);
+
+ return (0);
+}
+
+/**
+ * ice_rdma_link_change - Notify RDMA client of a change in link status
+ * @sc: the ice driver softc
+ * @linkstate: the link status
+ * @baudrate: the link rate in bits per second
+ *
+ * Notify the RDMA client of a link status change, by sending it the new link
+ * state and baudrate.
+ *
+ * The link state is represented the same was as in the ifnet structure. It
+ * should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP.
+ */
+void
+ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate)
+{
+ struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+ struct ice_rdma_event event;
+
+ memset(&event, 0, sizeof(struct ice_rdma_event));
+ event.type = ICE_RDMA_EVENT_LINK_CHANGE;
+ event.linkstate = linkstate;
+ event.baudrate = baudrate;
+
+ sx_xlock(&ice_rdma.mtx);
+
+ if (sc->rdma_entry.attached && ice_rdma.registered)
+ IRDMA_EVENT_HANDLER(peer, &event);
+
+ sx_xunlock(&ice_rdma.mtx);
+}
+
+/**
+ * ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic
+ * @sc: the ice driver softc
+ *
+ * Notify the RDMA driver that QOS/DCB settings are about to change.
+ * Once the function return, all the QPs should be suspended.
+ */
+void
+ice_rdma_notify_dcb_qos_change(struct ice_softc *sc)
+{
+ struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+ struct ice_rdma_event event;
+
+ memset(&event, 0, sizeof(struct ice_rdma_event));
+ event.type = ICE_RDMA_EVENT_TC_CHANGE;
+ /* pre-event */
+ event.prep = true;
+
+ sx_xlock(&ice_rdma.mtx);
+ if (sc->rdma_entry.attached && ice_rdma.registered)
+ IRDMA_EVENT_HANDLER(peer, &event);
+ sx_xunlock(&ice_rdma.mtx);
+}
+
+/**
+ * ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver
+ * @sc: the ice driver softc
+ * @pi: the port info structure
+ *
+ * Pass the changed DCB settings to RDMA traffic. This function should be
+ * called only after ice_rdma_notify_dcb_qos_change has been called and
+ * returned before. After the function returns, all the RDMA traffic
+ * should be resumed.
+ */
+void
+ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi)
+{
+ struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+ struct ice_rdma_event event;
+
+ memset(&event, 0, sizeof(struct ice_rdma_event));
+ event.type = ICE_RDMA_EVENT_TC_CHANGE;
+ /* post-event */
+ event.prep = false;
+
+ /* gather current configuration */
+ ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos);
+ sx_xlock(&ice_rdma.mtx);
+ if (sc->rdma_entry.attached && ice_rdma.registered)
+ IRDMA_EVENT_HANDLER(peer, &event);
+ sx_xunlock(&ice_rdma.mtx);
+}
diff --git a/sys/dev/ice/ice_rdma.h b/sys/dev/ice/ice_rdma.h
new file mode 100644
index 000000000000..5d3c33dd4e66
--- /dev/null
+++ b/sys/dev/ice/ice_rdma.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2021, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*$FreeBSD$*/
+
+/**
+ * @file ice_rdma.h
+ * @brief header file for RDMA client interface functions
+ *
+ * Contains definitions and function calls shared by the ice driver and the
+ * RDMA client interface driver.
+ *
+ * Since these definitions are shared between drivers it is important that any
+ * changes are considered carefully for backwards compatibility.
+ */
+#ifndef _ICE_RDMA_H_
+#define _ICE_RDMA_H_
+
+/*
+ * The RDMA client interface version is used to help determine
+ * incompatibilities between the interface definition shared between the main
+ * driver and the client driver.
+ *
+ * It will follows the semantic version guidelines, that is:
+ * Given the version number MAJOR.MINOR.PATCH, increment the:
+ *
+ * MAJOR version when you make incompatible changes,
+ * MINOR version when you add functionality in a backwards-compatible manner, and
+ * PATCH version when you make backwards-compatible bug fixes.
+ *
+ * Any change to this file, or one of the kobject interface files must come
+ * with an associated change in one of the MAJOR, MINOR, or PATCH versions,
+ * and care must be taken that backwards incompatible changes MUST increment
+ * the MAJOR version.
+ *
+ * Note: Until the MAJOR version is set to at least 1, the above semantic
+ * version guarantees may not hold, and this interface should not be
+ * considered stable.
+ */
+#define ICE_RDMA_MAJOR_VERSION 1
+#define ICE_RDMA_MINOR_VERSION 0
+#define ICE_RDMA_PATCH_VERSION 0
+
+/**
+ * @def ICE_RDMA_MAX_MSIX
+ * @brief Maximum number of MSI-X vectors that will be reserved
+ *
+ * Defines the maximum number of MSI-X vectors that an RDMA interface will
+ * have reserved in advance. Does not guarantee that many vectors have
+ * actually been enabled.
+ */
+#define ICE_RDMA_MAX_MSIX 64
+
+/**
+ * @struct ice_rdma_info
+ * @brief RDMA information from the client driver
+ *
+ * The RDMA client driver will fill in this structure and pass its contents
+ * back to the main driver using the ice_rdma_register function.
+ *
+ * It should fill the version in with the ICE_RDMA_* versions as defined in
+ * the ice_rdma.h header.
+ *
+ * Additionally it must provide a pointer to a kobject class which extends the
+ * ice_rdma_di_class with the operations defined in the rdma_if.m interface.
+ *
+ * If the version specified is not compatible, then the registration will
+ * of the RDMA driver will fail.
+ */
+struct ice_rdma_info {
+ uint16_t major_version;
+ uint16_t minor_version;
+ uint16_t patch_version;
+
+ kobj_class_t rdma_class;
+};
+
+#define ICE_RDMA_MAX_USER_PRIORITY 8
+#define ICE_RDMA_MAX_MSIX 64
+
+/* Declare the ice_rdma_di kobject class */
+DECLARE_CLASS(ice_rdma_di_class);
+
+/**
+ * @struct ice_rdma_msix_mapping
+ * @brief MSI-X mapping requested by the peer RDMA driver
+ *
+ * Defines a mapping for MSI-X vectors being requested by the peer RDMA driver
+ * for a given PF.
+ */
+struct ice_rdma_msix_mapping {
+ uint8_t itr_indx;
+ int aeq_vector;
+ int ceq_cnt;
+ int *ceq_vector;
+};
+
+/**
+ * @struct ice_rdma_msix
+ * @brief RDMA MSI-X vectors reserved for the peer RDMA driver
+ *
+ * Defines the segment of the MSI-X vectors for use by the RDMA driver. These
+ * are reserved by the PF when it initializes.
+ */
+struct ice_rdma_msix {
+ int base;
+ int count;
+};
+
+/**
+ * @struct ice_qos_info
+ * @brief QoS information to be shared with RDMA driver
+ */
+struct ice_qos_info {
+ uint64_t tc_ctx;
+ uint8_t rel_bw;
+ uint8_t prio_type;
+ uint8_t egress_virt_up;
+ uint8_t ingress_virt_up;
+};
+
+/**
+ * @struct ice_qos_app_priority_table
+ * @brief Application priority data
+ */
+struct ice_qos_app_priority_table {
+ uint16_t prot_id;
+ uint8_t priority;
+ uint8_t selector;
+};
+
+#define IEEE_8021QAZ_MAX_TCS 8
+#define ICE_TC_MAX_USER_PRIORITY 8
+#define ICE_QOS_MAX_APPS 32
+#define ICE_QOS_DSCP_NUM_VAL 64
+
+/**
+ * @struct ice_qos_params
+ * @brief Holds all necessary data for RDMA to work with DCB
+ *
+ * Struct to hold QoS info
+ */
+struct ice_qos_params {
+ struct ice_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+ uint8_t up2tc[ICE_TC_MAX_USER_PRIORITY];
+ uint8_t vsi_relative_bw;
+ uint8_t vsi_priority_type;
+ uint32_t num_apps;
+ uint8_t pfc_mode;
+ uint8_t dscp_map[ICE_QOS_DSCP_NUM_VAL];
+ struct ice_qos_app_priority_table apps[ICE_QOS_MAX_APPS];
+ uint8_t num_tc;
+};
+
+/**
+ * @struct ice_rdma_peer
+ * @brief RDMA driver information
+ *
+ * Shared structure used by the RDMA client driver when talking with the main
+ * device driver.
+ *
+ * Because the definition of this structure is shared between the two drivers,
+ * its ABI should be handled carefully.
+ */
+struct ice_rdma_peer {
+ /**
+ * The KOBJ_FIELDS macro must come first, in order for it to be used
+ * as a kobject.
+ */
+ KOBJ_FIELDS;
+
+ struct ifnet *ifp;
+ device_t dev;
+ struct resource *pci_mem;
+ struct ice_qos_params initial_qos_info;
+ struct ice_rdma_msix msix;
+ uint16_t mtu;
+ uint16_t pf_vsi_num;
+ uint8_t pf_id;
+};
+
+/**
+ * @enum ice_res_type
+ * @brief enum for type of resource registration
+ *
+ * enum for type of resource registration.
+ * created for plausible compatibility with IDC
+ */
+enum ice_res_type {
+ ICE_INVAL_RES = 0x0,
+ ICE_RDMA_QSET_ALLOC = 0x8,
+ ICE_RDMA_QSET_FREE = 0x18,
+};
+
+/**
+ * @struct ice_rdma_qset_params
+ * @brief struct to hold per RDMA Qset info
+ */
+struct ice_rdma_qset_params {
+ uint32_t teid; /* qset TEID */
+ uint16_t qs_handle; /* RDMA driver provides this */
+ uint16_t vsi_id; /* VSI index */
+ uint8_t tc; /* TC branch the QSet should belong to */
+ uint8_t reserved[3];
+};
+
+#define ICE_MAX_TXQ_PER_TXQG 128
+/**
+ * @struct ice_rdma_qset_update
+ * @brief struct used to register and unregister qsets for RDMA driver
+ */
+struct ice_rdma_qset_update {
+ enum ice_res_type res_type;
+ uint16_t cnt_req;
+ uint16_t res_allocated;
+ uint32_t res_handle;
+ struct ice_rdma_qset_params qsets;
+};
+
+/**
+ * @enum ice_rdma_event_type
+ * @brief enum for type of event from base driver
+ */
+enum ice_rdma_event_type {
+ ICE_RDMA_EVENT_NONE = 0,
+ ICE_RDMA_EVENT_LINK_CHANGE,
+ ICE_RDMA_EVENT_MTU_CHANGE,
+ ICE_RDMA_EVENT_TC_CHANGE,
+ ICE_RDMA_EVENT_API_CHANGE,
+ ICE_RDMA_EVENT_CRIT_ERR,
+ ICE_RDMA_EVENT_RESET,
+ ICE_RDMA_EVENT_QSET_REGISTER,
+ ICE_RDMA_EVENT_VSI_FILTER_UPDATE,
+ ICE_RDMA_EVENT_LAST
+};
+
+/**
+ * @struct ice_rdma_event
+ * @brief struct for event information to pass to RDMA driver
+ */
+struct ice_rdma_event {
+ enum ice_rdma_event_type type;
+ union {
+ /* link change event */
+ struct {
+ int linkstate;
+ uint64_t baudrate;
+ };
+ /* MTU change event */
+ struct {
+ int mtu;
+ };
+ /*
+ * TC/QoS/DCB change event
+ * RESET event use prep variable only
+ * prep: if true, this is a pre-event, post-event otherwise
+ */
+ struct {
+ struct ice_qos_params port_qos;
+ bool prep;
+ };
+ };
+};
+
+/**
+ * @struct ice_rdma_request
+ * @brief struct with data for a request from the RDMA driver
+ */
+struct ice_rdma_request {
+ enum ice_rdma_event_type type;
+ union {
+ struct {
+ struct ice_rdma_qset_update res;
+ };
+ struct {
+ bool enable_filter;
+ };
+ };
+};
+
+int ice_rdma_register(struct ice_rdma_info *info);
+int ice_rdma_unregister(void);
+
+#endif
diff --git a/sys/dev/ice/ice_rdma_internal.h b/sys/dev/ice/ice_rdma_internal.h
new file mode 100644
index 000000000000..640e1ac0f6f9
--- /dev/null
+++ b/sys/dev/ice/ice_rdma_internal.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2022, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*$FreeBSD$*/
+
+/**
+ * @file ice_rdma_internal.h
+ * @brief internal header for the RMDA driver interface setup
+ *
+ * Contains the definitions and functions used by the ice driver to setup the
+ * RDMA driver interface. Functions and definitions in this file are not
+ * shared with the RDMA client driver.
+ */
+#ifndef _ICE_RDMA_INTERNAL_H_
+#define _ICE_RDMA_INTERNAL_H_
+
+#include "ice_rdma.h"
+
+/* Forward declare the softc structure */
+struct ice_softc;
+
+/* Global sysctl variable indicating if the RDMA client interface is enabled */
+extern bool ice_enable_irdma;
+
+/**
+ * @struct ice_rdma_entry
+ * @brief RDMA peer list node
+ *
+ * Structure used to store peer entries for each PF in a linked list.
+ */
+struct ice_rdma_entry {
+ LIST_ENTRY(ice_rdma_entry) node;
+ struct ice_rdma_peer peer;
+ bool attached;
+ bool initiated;
+};
+
+#define ice_rdma_peer_to_entry(p) __containerof(p, struct ice_rdma_entry, peer)
+#define ice_rdma_entry_to_sc(e) __containerof(e, struct ice_softc, rdma_entry)
+#define ice_rdma_peer_to_sc(p) ice_rdma_entry_to_sc(ice_rdma_peer_to_entry(p))
+
+/**
+ * @struct ice_rdma_peers
+ * @brief Head list structure for the RDMA entry list
+ *
+ * Type defining the head of the linked list of RDMA entries.
+ */
+LIST_HEAD(ice_rdma_peers, ice_rdma_entry);
+
+/**
+ * @struct ice_rdma_state
+ * @brief global driver state for RDMA
+ *
+ * Contains global state shared across all PFs by the device driver, such as
+ * the kobject class of the currently connected peer driver, and the linked
+ * list of peer entries for each PF.
+ */
+struct ice_rdma_state {
+ bool registered;
+ kobj_class_t peer_class;
+ struct sx mtx;
+ struct ice_rdma_peers peers;
+};
+
+void ice_rdma_init(void);
+void ice_rdma_exit(void);
+
+int ice_rdma_pf_attach(struct ice_softc *sc);
+void ice_rdma_pf_detach(struct ice_softc *sc);
+int ice_rdma_pf_init(struct ice_softc *sc);
+int ice_rdma_pf_stop(struct ice_softc *sc);
+void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate);
+void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc);
+void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi);
+#endif
diff --git a/sys/dev/ice/ice_sched.c b/sys/dev/ice/ice_sched.c
index 206f208886b6..5784db89c443 100644
--- a/sys/dev/ice/ice_sched.c
+++ b/sys/dev/ice/ice_sched.c
@@ -621,6 +621,48 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
}
/**
+ * ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static enum ice_status
+ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+ struct ice_vsi_ctx *vsi_ctx;
+ struct ice_q_ctx *q_ctx;
+
+ vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!vsi_ctx)
+ return ICE_ERR_PARAM;
+ /* allocate RDMA queue contexts */
+ if (!vsi_ctx->rdma_q_ctx[tc]) {
+ vsi_ctx->rdma_q_ctx[tc] = (struct ice_q_ctx *)
+ ice_calloc(hw, new_numqs, sizeof(*q_ctx));
+ if (!vsi_ctx->rdma_q_ctx[tc])
+ return ICE_ERR_NO_MEMORY;
+ vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+ return ICE_SUCCESS;
+ }
+ /* num queues are increased, update the queue contexts */
+ if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) {
+ u16 prev_num = vsi_ctx->num_rdma_q_entries[tc];
+
+ q_ctx = (struct ice_q_ctx *)
+ ice_calloc(hw, new_numqs, sizeof(*q_ctx));
+ if (!q_ctx)
+ return ICE_ERR_NO_MEMORY;
+ ice_memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc],
+ prev_num * sizeof(*q_ctx), ICE_DMA_TO_NONDMA);
+ ice_free(hw, vsi_ctx->rdma_q_ctx[tc]);
+ vsi_ctx->rdma_q_ctx[tc] = q_ctx;
+ vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+ }
+ return ICE_SUCCESS;
+}
+
+/**
* ice_aq_rl_profile - performs a rate limiting task
* @hw: pointer to the HW struct
* @opcode: opcode for add, query, or remove profile(s)
@@ -1904,13 +1946,22 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!vsi_ctx)
return ICE_ERR_PARAM;
- prev_numqs = vsi_ctx->sched.max_lanq[tc];
+ if (owner == ICE_SCHED_NODE_OWNER_LAN)
+ prev_numqs = vsi_ctx->sched.max_lanq[tc];
+ else
+ prev_numqs = vsi_ctx->sched.max_rdmaq[tc];
/* num queues are not changed or less than the previous number */
if (new_numqs <= prev_numqs)
return status;
- status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
- if (status)
- return status;
+ if (owner == ICE_SCHED_NODE_OWNER_LAN) {
+ status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+ if (status)
+ return status;
+ } else {
+ status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs);
+ if (status)
+ return status;
+ }
if (new_numqs)
ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
@@ -1925,7 +1976,10 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
new_num_nodes, owner);
if (status)
return status;
- vsi_ctx->sched.max_lanq[tc] = new_numqs;
+ if (owner == ICE_SCHED_NODE_OWNER_LAN)
+ vsi_ctx->sched.max_lanq[tc] = new_numqs;
+ else
+ vsi_ctx->sched.max_rdmaq[tc] = new_numqs;
return ICE_SUCCESS;
}
@@ -1991,6 +2045,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
* recreate the child nodes all the time in these cases.
*/
vsi_ctx->sched.max_lanq[tc] = 0;
+ vsi_ctx->sched.max_rdmaq[tc] = 0;
}
/* update the VSI child nodes */
@@ -2121,6 +2176,8 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
}
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[i] = 0;
+ else
+ vsi_ctx->sched.max_rdmaq[i] = 0;
}
status = ICE_SUCCESS;
@@ -2143,6 +2200,19 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
}
/**
+ * ice_rm_vsi_rdma_cfg - remove VSI and its RDMA children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its RDMA children nodes from scheduler tree
+ * for all TCs.
+ */
+enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+ return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
* ice_sched_is_tree_balanced - Check tree nodes are identical or not
* @hw: pointer to the HW struct
* @node: pointer to the ice_sched_node struct
diff --git a/sys/dev/ice/ice_sched.h b/sys/dev/ice/ice_sched.h
index 0f3c546990cb..7b37ca828c88 100644
--- a/sys/dev/ice/ice_sched.h
+++ b/sys/dev/ice/ice_sched.h
@@ -139,6 +139,7 @@ enum ice_status
ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
u8 owner, bool enable);
enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+enum ice_status ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle);
struct ice_sched_node *
ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
u16 vsi_handle);
diff --git a/sys/dev/ice/ice_switch.c b/sys/dev/ice/ice_switch.c
index c8331693c193..2c2f0e8de6a9 100644
--- a/sys/dev/ice/ice_switch.c
+++ b/sys/dev/ice/ice_switch.c
@@ -537,6 +537,10 @@ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
ice_free(hw, vsi->lan_q_ctx[i]);
vsi->lan_q_ctx[i] = NULL;
}
+ if (vsi->rdma_q_ctx[i]) {
+ ice_free(hw, vsi->rdma_q_ctx[i]);
+ vsi->rdma_q_ctx[i] = NULL;
+ }
}
}
@@ -659,6 +663,47 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
}
/**
+ * ice_cfg_iwarp_fltr - enable/disable iWARP filtering on VSI
+ * @hw: pointer to HW struct
+ * @vsi_handle: VSI SW index
+ * @enable: boolean for enable/disable
+ */
+enum ice_status
+ice_cfg_iwarp_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
+{
+ struct ice_vsi_ctx *ctx, *cached_ctx;
+ enum ice_status status;
+
+ cached_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+ if (!cached_ctx)
+ return ICE_ERR_DOES_NOT_EXIST;
+
+ ctx = (struct ice_vsi_ctx *)ice_calloc(hw, 1, sizeof(*ctx));
+ if (!ctx)
+ return ICE_ERR_NO_MEMORY;
+
+ ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss;
+ ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc;
+ ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags;
+
+ ctx->info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+ if (enable)
+ ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+ else
+ ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+
+ status = ice_update_vsi(hw, vsi_handle, ctx, NULL);
+ if (!status) {
+ cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags;
+ cached_ctx->info.valid_sections |= ctx->info.valid_sections;
+ }
+
+ ice_free(hw, ctx);
+ return status;
+}
+
+/**
* ice_aq_get_vsi_params
* @hw: pointer to the HW struct
* @vsi_ctx: pointer to a VSI context struct
diff --git a/sys/dev/ice/ice_switch.h b/sys/dev/ice/ice_switch.h
index cd5994d73fd6..b1f1b312b136 100644
--- a/sys/dev/ice/ice_switch.h
+++ b/sys/dev/ice/ice_switch.h
@@ -77,6 +77,8 @@ struct ice_vsi_ctx {
u8 vf_num;
u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+ u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS];
+ struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
};
/* This is to be used by add/update mirror rule Admin Queue command */
@@ -452,6 +454,8 @@ enum ice_status
ice_add_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list);
enum ice_status
ice_remove_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list);
+enum ice_status
+ice_cfg_iwarp_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
enum ice_status
ice_add_mac_with_sw_marker(struct ice_hw *hw, struct ice_fltr_info *f_info,
diff --git a/sys/dev/ice/ice_type.h b/sys/dev/ice/ice_type.h
index d8480543e39a..c7a25b026130 100644
--- a/sys/dev/ice/ice_type.h
+++ b/sys/dev/ice/ice_type.h
@@ -153,6 +153,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
#define ICE_DBG_SW BIT_ULL(13)
#define ICE_DBG_SCHED BIT_ULL(14)
+#define ICE_DBG_RDMA BIT_ULL(15)
#define ICE_DBG_PKG BIT_ULL(16)
#define ICE_DBG_RES BIT_ULL(17)
#define ICE_DBG_AQ_MSG BIT_ULL(24)
@@ -404,6 +405,7 @@ struct ice_hw_common_caps {
u8 dcb;
u8 iscsi;
u8 mgmt_cem;
+ u8 iwarp;
/* WoL and APM support */
#define ICE_WOL_SUPPORT_M BIT(0)
@@ -774,6 +776,7 @@ struct ice_sched_vsi_info {
struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+ u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
/* bw_t_info saves VSI BW information */
struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
};
diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c
index 1b417a5aff82..5e5034664251 100644
--- a/sys/dev/ice/if_ice_iflib.c
+++ b/sys/dev/ice/if_ice_iflib.c
@@ -675,12 +675,14 @@ ice_update_link_status(struct ice_softc *sc, bool update_media)
ice_set_default_local_lldp_mib(sc);
iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
+ ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
ice_link_up_msg(sc);
update_media = true;
} else { /* link is down */
iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
+ ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
update_media = true;
}
@@ -788,6 +790,10 @@ ice_if_attach_post(if_ctx_t ctx)
/* Enable ITR 0 right away, so that we can handle admin interrupts */
ice_enable_intr(&sc->hw, sc->irqvs[0].me);
+ err = ice_rdma_pf_attach(sc);
+ if (err)
+ return (err);
+
/* Start the admin timer */
mtx_lock(&sc->admin_mtx);
callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
@@ -884,6 +890,8 @@ ice_if_detach(if_ctx_t ctx)
mtx_unlock(&sc->admin_mtx);
mtx_destroy(&sc->admin_mtx);
+ ice_rdma_pf_detach(sc);
+
/* Free allocated media types */
ifmedia_removeall(sc->media);
@@ -1308,6 +1316,7 @@ ice_allocate_msix(struct ice_softc *sc)
cpuset_t cpus;
int bar, queues, vectors, requested;
int err = 0;
+ int rdma;
/* Allocate the MSI-X bar */
bar = scctx->isc_msix_bar;
@@ -1353,11 +1362,24 @@ ice_allocate_msix(struct ice_softc *sc)
queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
+ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
+ /*
+ * Choose a number of RDMA vectors based on the number of CPUs
+ * up to a maximum
+ */
+ rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
+
+ /* Further limit by the user configurable tunable */
+ rdma = min(rdma, ice_rdma_max_msix);
+ } else {
+ rdma = 0;
+ }
+
/*
* Determine the number of vectors to request. Note that we also need
* to allocate one vector for administrative tasks.
*/
- requested = queues + 1;
+ requested = rdma + queues + 1;
vectors = requested;
@@ -1376,6 +1398,23 @@ ice_allocate_msix(struct ice_softc *sc)
requested, vectors);
/*
+ * The OS didn't grant us the requested number of vectors.
+ * Check to see if we can reduce demands by limiting the
+ * number of vectors allocated to certain features.
+ */
+
+ if (rdma >= diff) {
+ /* Reduce the number of RDMA vectors we reserve */
+ rdma -= diff;
+ diff = 0;
+ } else {
+ /* Disable RDMA and reduce the difference */
+ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
+ diff -= rdma;
+ rdma = 0;
+ }
+
+ /*
* If we still have a difference, we need to reduce the number
* of queue pairs.
*
@@ -1392,6 +1431,9 @@ ice_allocate_msix(struct ice_softc *sc)
}
device_printf(dev, "Using %d Tx and Rx queues\n", queues);
+ if (rdma)
+ device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
+ rdma);
device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
vectors);
@@ -1400,6 +1442,8 @@ ice_allocate_msix(struct ice_softc *sc)
scctx->isc_ntxqsets = queues;
scctx->isc_intr = IFLIB_INTR_MSIX;
+ sc->irdma_vectors = rdma;
+
/* Interrupt allocation tracking isn't required in recovery mode,
* since neither RDMA nor VFs are enabled.
*/
@@ -1407,13 +1451,21 @@ ice_allocate_msix(struct ice_softc *sc)
return (0);
/* Keep track of which interrupt indices are being used for what */
- sc->lan_vectors = vectors;
+ sc->lan_vectors = vectors - rdma;
err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
if (err) {
device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
ice_err_str(err));
goto err_pci_release_msi;
}
+ err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
+ if (err) {
+ device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
+ ice_err_str(err));
+ ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
+ sc->lan_vectors);
+ goto err_pci_release_msi;
+ }
return (0);
@@ -1916,6 +1968,8 @@ ice_if_init(if_ctx_t ctx)
/* Configure promiscuous mode */
ice_if_promisc_set(ctx, if_getflags(sc->ifp));
+ ice_rdma_pf_init(sc);
+
ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
return;
@@ -2061,6 +2115,9 @@ ice_transition_recovery_mode(struct ice_softc *sc)
/* Request that the device be re-initialized */
ice_request_stack_reinit(sc);
+ ice_rdma_pf_detach(sc);
+ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
+
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
@@ -2106,6 +2163,9 @@ ice_transition_safe_mode(struct ice_softc *sc)
ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
+ ice_rdma_pf_detach(sc);
+ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
+
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
@@ -2222,6 +2282,9 @@ ice_prepare_for_reset(struct ice_softc *sc)
if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
return;
+ /* stop the RDMA client */
+ ice_rdma_pf_stop(sc);
+
/* Release the main PF VSI queue mappings */
ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
sc->pf_vsi.num_tx_queues);
@@ -2480,6 +2543,8 @@ ice_rebuild(struct ice_softc *sc)
ice_get_link_status(sc->hw.port_info, &sc->link_up);
ice_update_link_status(sc, true);
+ /* RDMA interface will be restarted by the stack re-init */
+
/* Configure interrupt causes for the administrative interrupt */
ice_configure_misc_interrupts(sc);
@@ -2633,6 +2698,7 @@ ice_init_device_features(struct ice_softc *sc)
/* Set capabilities that all devices support */
ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
+ ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
@@ -2643,6 +2709,8 @@ ice_init_device_features(struct ice_softc *sc)
/* Disable features due to hardware limitations... */
if (!sc->hw.func_caps.common_cap.rss_table_size)
ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
+ if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
+ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
/* Disable features due to firmware limitations... */
if (!ice_is_fw_health_report_supported(&sc->hw))
ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
@@ -2795,6 +2863,8 @@ ice_if_stop(if_ctx_t ctx)
return;
}
+ ice_rdma_pf_stop(sc);
+
/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
* return of these functions because there's nothing we can really do
* if they fail, and the functions already print error messages.
diff --git a/sys/dev/ice/irdma_di_if.m b/sys/dev/ice/irdma_di_if.m
new file mode 100644
index 000000000000..929e1db5343e
--- /dev/null
+++ b/sys/dev/ice/irdma_di_if.m
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2021, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# $FreeBSD$
+
+/**
+ * @file irdma_di_if.m
+ * @brief RDMA client kobject driver interface
+ *
+ * KObject methods implemented by the ice driver. These functions are called
+ * by the RDMA client driver to connect with the ice driver and request
+ * operations or notify the driver of RDMA events.
+ */
+#include "ice_rdma.h"
+
+INTERFACE irdma_di;
+
+/**
+ * reset - Request the ice driver to perform a reset
+ * @peer: the RDMA peer structure
+ *
+ * Called by the RDMA client driver to request a reset of the ice device.
+ */
+METHOD int reset {
+ struct ice_rdma_peer *peer;
+};
+
+/**
+ * msix_init - Initialize MSI-X resources for the RDMA driver
+ * @peer: the RDMA peer structure
+ * @msix_info: the requested MSI-X mapping
+ *
+ * Called by the RDMA client driver to request initialization of the MSI-X
+ * resources used for RDMA functionality.
+ */
+METHOD int msix_init {
+ struct ice_rdma_peer *peer;
+ struct ice_rdma_msix_mapping *msix_info;
+};
+
+/**
+ * qset_register_request - RDMA client interface request qset
+ * registration or deregistration
+ * @peer: the RDMA peer client structure
+ * @res: resources to be registered or unregistered
+ */
+METHOD int qset_register_request {
+ struct ice_rdma_peer *peer;
+ struct ice_rdma_qset_update *res;
+};
+
+/**
+ * vsi_filter_update - configure vsi information
+ * when opening or closing rdma driver
+ * @peer: the RDMA peer client structure
+ * @enable: enable or disable the rdma filter
+ */
+METHOD int vsi_filter_update {
+ struct ice_rdma_peer *peer;
+ bool enable;
+};
+
+/**
+ * req_handler - handle requests incoming from RDMA driver
+ * @peer: the RDMA peer client structure
+ * @req: structure containing request
+ */
+METHOD void req_handler {
+ struct ice_rdma_peer *peer;
+ struct ice_rdma_request *req;
+};
diff --git a/sys/dev/ice/irdma_if.m b/sys/dev/ice/irdma_if.m
new file mode 100644
index 000000000000..84651b7cecc0
--- /dev/null
+++ b/sys/dev/ice/irdma_if.m
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2021, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# $FreeBSD$
+
+/**
+ * @file irdma_if.m
+ * @brief RDMA client kobject interface
+ *
+ * KOBject methods implemented by the RDMA client driver. These functions will
+ * be called from the ice driver to notify the RDMA client driver of device
+ * driver events.
+ */
+#include "ice_rdma.h"
+
+INTERFACE irdma;
+
+/**
+ * probe - Notify the RDMA client driver that a peer device has been created
+ * @peer: the RDMA peer structure
+ *
+ * Called by the ice driver during attach to notify the RDMA client driver
+ * that a new PF has been initialized.
+ */
+METHOD int probe {
+ struct ice_rdma_peer *peer;
+};
+
+/**
+ * open - Notify the RDMA client driver that a peer device has been opened
+ * @peer: the RDMA peer structure
+ *
+ * Called by the ice driver during the if_init routine to notify the RDMA
+ * client driver that a PF has been activated.
+ */
+METHOD int open {
+ struct ice_rdma_peer *peer;
+};
+
+/**
+ * close - Notify the RDMA client driver that a peer device has closed
+ * @peer: the RDMA peer structure
+ *
+ * Called by the ice driver during the if_stop routine to notify the RDMA
+ * client driver that a PF has been deactivated.
+ */
+METHOD int close {
+ struct ice_rdma_peer *peer;
+};
+
+/**
+ * remove - Notify the RDMA client driver that a peer device has been removed
+ * @peer: the RDMA peer structure
+ *
+ * Called by the ice driver during detach to notify the RDMA client driver
+ * that a PF has been removed.
+ */
+METHOD int remove {
+ struct ice_rdma_peer *peer;
+}
+
+/**
+ * link_change - Notify the RDMA client driver that link status has changed
+ * @peer: the RDMA peer structure
+ * @linkstate: link status
+ * @baudrate: link rate in bits per second
+ *
+ * Called by the ice driver when link status changes to notify the RDMA client
+ * driver of the new status.
+ */
+METHOD void link_change {
+ struct ice_rdma_peer *peer;
+ int linkstate;
+ uint64_t baudrate;
+}
+
+METHOD void event_handler {
+ struct ice_rdma_peer *peer;
+ struct ice_rdma_event *event;
+}