diff options
Diffstat (limited to 'sys/dev/ice/ice_common_txrx.h')
| -rw-r--r-- | sys/dev/ice/ice_common_txrx.h | 423 |
1 files changed, 423 insertions, 0 deletions
diff --git a/sys/dev/ice/ice_common_txrx.h b/sys/dev/ice/ice_common_txrx.h new file mode 100644 index 000000000000..5bab344ecd83 --- /dev/null +++ b/sys/dev/ice/ice_common_txrx.h @@ -0,0 +1,423 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file ice_common_txrx.h + * @brief common Tx/Rx utility functions + * + * Contains common utility functions for the Tx/Rx hot path. + * + * The functions do depend on the if_pkt_info_t structure. A suitable + * implementation of this structure must be provided if these functions are to + * be used without the iflib networking stack. + */ + +#ifndef _ICE_COMMON_TXRX_H_ +#define _ICE_COMMON_TXRX_H_ + +#include <netinet/udp.h> +#include <netinet/sctp.h> + +/** + * ice_tso_detect_sparse - detect TSO packets with too many segments + * @pi: packet information + * + * Hardware only transmits packets with a maximum of 8 descriptors. For TSO + * packets, hardware needs to be able to build the split packets using 8 or + * fewer descriptors. Additionally, the header must be contained within at + * most 3 descriptors. + * + * To verify this, we walk the headers to find out how many descriptors the + * headers require (usually 1). Then we ensure that, for each TSO segment, its + * data plus the headers are contained within 8 or fewer descriptors. + */ +static inline int +ice_tso_detect_sparse(if_pkt_info_t pi) +{ + int count, curseg, i, hlen, segsz, seglen, tsolen, hdrs, maxsegs; + bus_dma_segment_t *segs = pi->ipi_segs; + int nsegs = pi->ipi_nsegs; + + curseg = hdrs = 0; + + hlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; + tsolen = pi->ipi_len - hlen; + + /* First, count the number of descriptors for the header. + * Additionally, make sure it does not span more than 3 segments. + */ + i = 0; + curseg = segs[0].ds_len; + while (hlen > 0) { + hdrs++; + if (hdrs > ICE_MAX_TSO_HDR_SEGS) + return (1); + if (curseg == 0) { + i++; + if (__predict_false(i == nsegs)) + return (1); + + curseg = segs[i].ds_len; + } + seglen = min(curseg, hlen); + curseg -= seglen; + hlen -= seglen; + } + + maxsegs = ICE_MAX_TX_SEGS - hdrs; + + /* We must count the headers, in order to verify that they take up + * 3 or fewer descriptors. However, we don't need to check the data + * if the total segments is small. + */ + if (nsegs <= maxsegs) + return (0); + + count = 0; + + /* Now check the data to make sure that each TSO segment is made up of + * no more than maxsegs descriptors. This ensures that hardware will + * be capable of performing TSO offload. + */ + while (tsolen > 0) { + segsz = pi->ipi_tso_segsz; + while (segsz > 0 && tsolen != 0) { + count++; + if (count > maxsegs) { + return (1); + } + if (curseg == 0) { + i++; + if (__predict_false(i == nsegs)) { + return (1); + } + curseg = segs[i].ds_len; + } + seglen = min(curseg, segsz); + segsz -= seglen; + curseg -= seglen; + tsolen -= seglen; + } + count = 0; + } + + return (0); +} + +/** + * ice_tso_setup - Setup a context descriptor to prepare for a TSO packet + * @txq: the Tx queue to use + * @pi: the packet info to prepare for + * + * Setup a context descriptor in preparation for sending a Tx packet that + * requires the TSO offload. Returns the index of the descriptor to use when + * encapsulating the Tx packet data into descriptors. + */ +static inline int +ice_tso_setup(struct ice_tx_queue *txq, if_pkt_info_t pi) +{ + struct ice_tx_ctx_desc *txd; + u32 cmd, mss, type, tsolen; + int idx; + u64 type_cmd_tso_mss; + + idx = pi->ipi_pidx; + txd = (struct ice_tx_ctx_desc *)&txq->tx_base[idx]; + tsolen = pi->ipi_len - (pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen); + + type = ICE_TX_DESC_DTYPE_CTX; + cmd = ICE_TX_CTX_DESC_TSO; + /* TSO MSS must not be less than 64 */ + if (pi->ipi_tso_segsz < ICE_MIN_TSO_MSS) { + txq->stats.mss_too_small++; + pi->ipi_tso_segsz = ICE_MIN_TSO_MSS; + } + mss = pi->ipi_tso_segsz; + + type_cmd_tso_mss = ((u64)type << ICE_TXD_CTX_QW1_DTYPE_S) | + ((u64)cmd << ICE_TXD_CTX_QW1_CMD_S) | + ((u64)tsolen << ICE_TXD_CTX_QW1_TSO_LEN_S) | + ((u64)mss << ICE_TXD_CTX_QW1_MSS_S); + txd->qw1 = htole64(type_cmd_tso_mss); + + txd->tunneling_params = htole32(0); + txq->stats.tso++; + + return ((idx + 1) & (txq->desc_count-1)); +} + +/** + * ice_tx_setup_offload - Setup register values for performing a Tx offload + * @txq: The Tx queue, used to track checksum offload stats + * @pi: the packet info to program for + * @cmd: the cmd register value to update + * @off: the off register value to update + * + * Based on the packet info provided, update the cmd and off values for + * enabling Tx offloads. This depends on the packet type and which offloads + * have been requested. + * + * We also track the total number of times that we've requested hardware + * offload a particular type of checksum for debugging purposes. + */ +static inline void +ice_tx_setup_offload(struct ice_tx_queue *txq, if_pkt_info_t pi, u32 *cmd, u32 *off) +{ + u32 remaining_csum_flags = pi->ipi_csum_flags; + + switch (pi->ipi_etype) { +#ifdef INET + case ETHERTYPE_IP: + if (pi->ipi_csum_flags & ICE_CSUM_IP) { + *cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; + txq->stats.cso[ICE_CSO_STAT_TX_IP4]++; + remaining_csum_flags &= ~CSUM_IP; + } else + *cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + *cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; + /* + * This indicates that the IIPT flag was set to the IPV6 value; + * there's no checksum for IPv6 packets. + */ + txq->stats.cso[ICE_CSO_STAT_TX_IP6]++; + break; +#endif + default: + txq->stats.cso[ICE_CSO_STAT_TX_L3_ERR]++; + break; + } + + *off |= (pi->ipi_ehdrlen >> 1) << ICE_TX_DESC_LEN_MACLEN_S; + *off |= (pi->ipi_ip_hlen >> 2) << ICE_TX_DESC_LEN_IPLEN_S; + + if (!(remaining_csum_flags & ~ICE_RX_CSUM_FLAGS)) + return; + + switch (pi->ipi_ipproto) { + case IPPROTO_TCP: + if (pi->ipi_csum_flags & ICE_CSUM_TCP) { + *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP; + *off |= (pi->ipi_tcp_hlen >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + txq->stats.cso[ICE_CSO_STAT_TX_TCP]++; + } + break; + case IPPROTO_UDP: + if (pi->ipi_csum_flags & ICE_CSUM_UDP) { + *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP; + *off |= (sizeof(struct udphdr) >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + txq->stats.cso[ICE_CSO_STAT_TX_UDP]++; + } + break; + case IPPROTO_SCTP: + if (pi->ipi_csum_flags & ICE_CSUM_SCTP) { + *cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP; + *off |= (sizeof(struct sctphdr) >> 2) << + ICE_TX_DESC_LEN_L4_LEN_S; + txq->stats.cso[ICE_CSO_STAT_TX_SCTP]++; + } + break; + default: + txq->stats.cso[ICE_CSO_STAT_TX_L4_ERR]++; + break; + } +} + +/** + * ice_rx_checksum - verify hardware checksum is valid or not + * @rxq: the Rx queue structure + * @flags: checksum flags to update + * @data: checksum data to update + * @status0: descriptor status data + * @ptype: packet type + * + * Determine whether the hardware indicated that the Rx checksum is valid. If + * so, update the checksum flags and data, informing the stack of the status + * of the checksum so that it does not spend time verifying it manually. + */ +static void +ice_rx_checksum(struct ice_rx_queue *rxq, uint32_t *flags, uint32_t *data, + u16 status0, u16 ptype) +{ + const u16 l3_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)); + const u16 l4_error = (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)); + const u16 xsum_errors = (l3_error | l4_error | + BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)); + struct ice_rx_ptype_decoded decoded; + bool is_ipv4, is_ipv6; + + /* No L3 or L4 checksum was calculated */ + if (!(status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) { + return; + } + + decoded = ice_decode_rx_desc_ptype(ptype); + *flags = 0; + + if (!(decoded.known && decoded.outer_ip)) + return; + + is_ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); + is_ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + + /* No checksum errors were reported */ + if (!(status0 & xsum_errors)) { + if (is_ipv4) + *flags |= CSUM_L3_CALC | CSUM_L3_VALID; + + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + case ICE_RX_PTYPE_INNER_PROT_UDP: + case ICE_RX_PTYPE_INNER_PROT_SCTP: + *flags |= CSUM_L4_CALC | CSUM_L4_VALID; + *data |= htons(0xffff); + break; + default: + break; + } + + return; + } + + /* + * Certain IPv6 extension headers impact the validity of L4 checksums. + * If one of these headers exist, hardware will set the IPV6EXADD bit + * in the descriptor. If the bit is set then pretend like hardware + * didn't checksum this packet. + */ + if (is_ipv6 && (status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))) { + rxq->stats.cso[ICE_CSO_STAT_RX_IP6_ERR]++; + return; + } + + /* + * At this point, status0 must have at least one of the l3_error or + * l4_error bits set. + */ + + if (status0 & l3_error) { + if (is_ipv4) { + rxq->stats.cso[ICE_CSO_STAT_RX_IP4_ERR]++; + *flags |= CSUM_L3_CALC; + } else { + /* Hardware indicated L3 error but this isn't IPv4? */ + rxq->stats.cso[ICE_CSO_STAT_RX_L3_ERR]++; + } + /* don't bother reporting L4 errors if we got an L3 error */ + return; + } else if (is_ipv4) { + *flags |= CSUM_L3_CALC | CSUM_L3_VALID; + } + + if (status0 & l4_error) { + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + rxq->stats.cso[ICE_CSO_STAT_RX_TCP_ERR]++; + *flags |= CSUM_L4_CALC; + break; + case ICE_RX_PTYPE_INNER_PROT_UDP: + rxq->stats.cso[ICE_CSO_STAT_RX_UDP_ERR]++; + *flags |= CSUM_L4_CALC; + break; + case ICE_RX_PTYPE_INNER_PROT_SCTP: + rxq->stats.cso[ICE_CSO_STAT_RX_SCTP_ERR]++; + *flags |= CSUM_L4_CALC; + break; + default: + /* + * Hardware indicated L4 error, but this isn't one of + * the expected protocols. + */ + rxq->stats.cso[ICE_CSO_STAT_RX_L4_ERR]++; + } + } +} + +/** + * ice_ptype_to_hash - Convert packet type to a hash value + * @ptype: the packet type to convert + * + * Given the packet type, convert to a suitable hashtype to report to the + * upper stack via the iri_rsstype value of the if_rxd_info_t structure. + * + * If the hash type is unknown we'll report M_HASHTYPE_OPAQUE. + */ +static inline int +ice_ptype_to_hash(u16 ptype) +{ + struct ice_rx_ptype_decoded decoded; + + if (ptype >= ARRAY_SIZE(ice_ptype_lkup)) + return M_HASHTYPE_OPAQUE; + + decoded = ice_decode_rx_desc_ptype(ptype); + + if (!decoded.known) + return M_HASHTYPE_OPAQUE; + + if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) + return M_HASHTYPE_OPAQUE; + + /* Note: anything that gets to this point is IP */ + if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6) { + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case ICE_RX_PTYPE_INNER_PROT_UDP: + return M_HASHTYPE_RSS_UDP_IPV6; + default: + return M_HASHTYPE_RSS_IPV6; + } + } + if (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4) { + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case ICE_RX_PTYPE_INNER_PROT_UDP: + return M_HASHTYPE_RSS_UDP_IPV4; + default: + return M_HASHTYPE_RSS_IPV4; + } + } + + /* We should never get here!! */ + return M_HASHTYPE_OPAQUE; +} +#endif |
