diff options
author | Zbigniew Bodek <zbb@FreeBSD.org> | 2015-10-18 21:39:15 +0000 |
---|---|---|
committer | Zbigniew Bodek <zbb@FreeBSD.org> | 2015-10-18 21:39:15 +0000 |
commit | 3c0086b81391c0b6daf6ba3f58a6d3aebb0cdaca (patch) | |
tree | 0c4298b3d546fae25262565adc2c58dc9d278537 | |
parent | 325151a32e114f02699a301c1e74080e7c1f1a26 (diff) | |
download | src-test2-3c0086b81391c0b6daf6ba3f58a6d3aebb0cdaca.tar.gz src-test2-3c0086b81391c0b6daf6ba3f58a6d3aebb0cdaca.zip |
Notes
-rw-r--r-- | sys/dev/vnic/nic.h | 539 | ||||
-rw-r--r-- | sys/dev/vnic/nic_main.c | 1192 | ||||
-rw-r--r-- | sys/dev/vnic/nic_reg.h | 234 | ||||
-rw-r--r-- | sys/dev/vnic/nicvf_main.c | 1667 | ||||
-rw-r--r-- | sys/dev/vnic/nicvf_queues.c | 1467 | ||||
-rw-r--r-- | sys/dev/vnic/nicvf_queues.h | 366 | ||||
-rw-r--r-- | sys/dev/vnic/q_struct.h | 719 | ||||
-rw-r--r-- | sys/dev/vnic/thunder_bgx.c | 1212 | ||||
-rw-r--r-- | sys/dev/vnic/thunder_bgx.h | 248 |
9 files changed, 7644 insertions, 0 deletions
diff --git a/sys/dev/vnic/nic.h b/sys/dev/vnic/nic.h new file mode 100644 index 000000000000..3023deabd05c --- /dev/null +++ b/sys/dev/vnic/nic.h @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef NIC_H +#define NIC_H + +#include <linux/netdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include "thunder_bgx.h" + +/* PCI device IDs */ +#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E +#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011 +#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034 +#define PCI_DEVICE_ID_THUNDER_BGX 0xA026 + +/* PCI BAR nos */ +#define PCI_CFG_REG_BAR_NUM 0 +#define PCI_MSIX_REG_BAR_NUM 4 + +/* NIC SRIOV VF count */ +#define MAX_NUM_VFS_SUPPORTED 128 +#define DEFAULT_NUM_VF_ENABLED 8 + +#define NIC_TNS_BYPASS_MODE 0 +#define NIC_TNS_MODE 1 + +/* NIC priv flags */ +#define NIC_SRIOV_ENABLED BIT(0) +#define NIC_TNS_ENABLED BIT(1) + +/* VNIC HW optimiation features */ +#define VNIC_RSS_SUPPORT +#define VNIC_MULTI_QSET_SUPPORT + +/* Min/Max packet size */ +#define NIC_HW_MIN_FRS 64 +#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */ + +/* Max pkinds */ +#define NIC_MAX_PKIND 16 + +/* Rx Channels */ +/* Receive channel configuration in TNS bypass mode + * Below is configuration in TNS bypass mode + * BGX0-LMAC0-CHAN0 - VNIC CHAN0 + * BGX0-LMAC1-CHAN0 - VNIC CHAN16 + * ... + * BGX1-LMAC0-CHAN0 - VNIC CHAN128 + * ... + * BGX1-LMAC3-CHAN0 - VNIC CHAN174 + */ +#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */ +#define NIC_CHANS_PER_INF 128 +#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF) +#define NIC_CPI_COUNT 2048 /* No of channel parse indices */ + +/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */ +#define NIC_MAX_BGX MAX_BGX_PER_CN88XX +#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX) +#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */ +#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX) + +/* Tx scheduling */ +#define NIC_MAX_TL4 1024 +#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */ +#define NIC_MAX_TL3 256 +#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */ +#define NIC_MAX_TL2 64 +#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */ +#define NIC_MAX_TL1 2 + +/* TNS bypass mode */ +#define NIC_TL2_PER_BGX 32 +#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX) +#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF) + +/* NIC VF Interrupts */ +#define NICVF_INTR_CQ 0 +#define NICVF_INTR_SQ 1 +#define NICVF_INTR_RBDR 2 +#define NICVF_INTR_PKT_DROP 3 +#define NICVF_INTR_TCP_TIMER 4 +#define NICVF_INTR_MBOX 5 +#define NICVF_INTR_QS_ERR 6 + +#define NICVF_INTR_CQ_SHIFT 0 +#define NICVF_INTR_SQ_SHIFT 8 +#define NICVF_INTR_RBDR_SHIFT 16 +#define NICVF_INTR_PKT_DROP_SHIFT 20 +#define NICVF_INTR_TCP_TIMER_SHIFT 21 +#define NICVF_INTR_MBOX_SHIFT 22 +#define NICVF_INTR_QS_ERR_SHIFT 23 + +#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT) +#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT) +#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT) +#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT) +#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT) +#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT) +#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT) + +/* MSI-X interrupts */ +#define NIC_PF_MSIX_VECTORS 10 +#define NIC_VF_MSIX_VECTORS 20 + +#define NIC_PF_INTR_ID_ECC0_SBE 0 +#define NIC_PF_INTR_ID_ECC0_DBE 1 +#define NIC_PF_INTR_ID_ECC1_SBE 2 +#define NIC_PF_INTR_ID_ECC1_DBE 3 +#define NIC_PF_INTR_ID_ECC2_SBE 4 +#define NIC_PF_INTR_ID_ECC2_DBE 5 +#define NIC_PF_INTR_ID_ECC3_SBE 6 +#define NIC_PF_INTR_ID_ECC3_DBE 7 +#define NIC_PF_INTR_ID_MBOX0 8 +#define NIC_PF_INTR_ID_MBOX1 9 + +/* Global timer for CQ timer thresh interrupts + * Calculated for SCLK of 700Mhz + * value written should be a 1/16th of what is expected + * + * 1 tick per 0.05usec = value of 2.2 + * This 10% would be covered in CQ timer thresh value + */ +#define NICPF_CLK_PER_INT_TICK 2 + +/* Time to wait before we decide that a SQ is stuck. + * + * Since both pkt rx and tx notifications are done with same CQ, + * when packets are being received at very high rate (eg: L2 forwarding) + * then freeing transmitted skbs will be delayed and watchdog + * will kick in, resetting interface. Hence keeping this value high. + */ +#define NICVF_TX_TIMEOUT (50 * HZ) + +struct nicvf_cq_poll { + struct nicvf *nicvf; + u8 cq_idx; /* Completion queue index */ + struct napi_struct napi; +}; + +#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */ +#define NIC_MAX_RSS_HASH_BITS 8 +#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) +#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */ + +#ifdef VNIC_RSS_SUPPORT +struct nicvf_rss_info { + bool enable; +#define RSS_L2_EXTENDED_HASH_ENA BIT(0) +#define RSS_IP_HASH_ENA BIT(1) +#define RSS_TCP_HASH_ENA BIT(2) +#define RSS_TCP_SYN_DIS BIT(3) +#define RSS_UDP_HASH_ENA BIT(4) +#define RSS_L4_EXTENDED_HASH_ENA BIT(5) +#define RSS_ROCE_ENA BIT(6) +#define RSS_L3_BI_DIRECTION_ENA BIT(7) +#define RSS_L4_BI_DIRECTION_ENA BIT(8) + u64 cfg; + u8 hash_bits; + u16 rss_size; + u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; + u64 key[RSS_HASH_KEY_SIZE]; +} ____cacheline_aligned_in_smp; +#endif + +enum rx_stats_reg_offset { + RX_OCTS = 0x0, + RX_UCAST = 0x1, + RX_BCAST = 0x2, + RX_MCAST = 0x3, + RX_RED = 0x4, + RX_RED_OCTS = 0x5, + RX_ORUN = 0x6, + RX_ORUN_OCTS = 0x7, + RX_FCS = 0x8, + RX_L2ERR = 0x9, + RX_DRP_BCAST = 0xa, + RX_DRP_MCAST = 0xb, + RX_DRP_L3BCAST = 0xc, + RX_DRP_L3MCAST = 0xd, + RX_STATS_ENUM_LAST, +}; + +enum tx_stats_reg_offset { + TX_OCTS = 0x0, + TX_UCAST = 0x1, + TX_BCAST = 0x2, + TX_MCAST = 0x3, + TX_DROP = 0x4, + TX_STATS_ENUM_LAST, +}; + +struct nicvf_hw_stats { + u64 rx_bytes; + u64 rx_ucast_frames; + u64 rx_bcast_frames; + u64 rx_mcast_frames; + u64 rx_fcs_errors; + u64 rx_l2_errors; + u64 rx_drop_red; + u64 rx_drop_red_bytes; + u64 rx_drop_overrun; + u64 rx_drop_overrun_bytes; + u64 rx_drop_bcast; + u64 rx_drop_mcast; + u64 rx_drop_l3_bcast; + u64 rx_drop_l3_mcast; + u64 rx_bgx_truncated_pkts; + u64 rx_jabber_errs; + u64 rx_fcs_errs; + u64 rx_bgx_errs; + u64 rx_prel2_errs; + u64 rx_l2_hdr_malformed; + u64 rx_oversize; + u64 rx_undersize; + u64 rx_l2_len_mismatch; + u64 rx_l2_pclp; + u64 rx_ip_ver_errs; + u64 rx_ip_csum_errs; + u64 rx_ip_hdr_malformed; + u64 rx_ip_payload_malformed; + u64 rx_ip_ttl_errs; + u64 rx_l3_pclp; + u64 rx_l4_malformed; + u64 rx_l4_csum_errs; + u64 rx_udp_len_errs; + u64 rx_l4_port_errs; + u64 rx_tcp_flag_errs; + u64 rx_tcp_offset_errs; + u64 rx_l4_pclp; + u64 rx_truncated_pkts; + + u64 tx_bytes_ok; + u64 tx_ucast_frames_ok; + u64 tx_bcast_frames_ok; + u64 tx_mcast_frames_ok; + u64 tx_drops; +}; + +struct nicvf_drv_stats { + /* Rx */ + u64 rx_frames_ok; + u64 rx_frames_64; + u64 rx_frames_127; + u64 rx_frames_255; + u64 rx_frames_511; + u64 rx_frames_1023; + u64 rx_frames_1518; + u64 rx_frames_jumbo; + u64 rx_drops; + + /* Tx */ + u64 tx_frames_ok; + u64 tx_drops; + u64 tx_tso; + u64 txq_stop; + u64 txq_wake; +}; + +struct nicvf { + struct nicvf *pnicvf; + struct net_device *netdev; + struct pci_dev *pdev; + u8 vf_id; + u8 node; + bool tns_mode:1; + bool sqs_mode:1; + bool loopback_supported:1; + u16 mtu; + struct queue_set *qs; +#ifdef VNIC_MULTI_QSET_SUPPORT +#define MAX_SQS_PER_VF_SINGLE_NODE 5 +#define MAX_SQS_PER_VF 11 + u8 sqs_id; + u8 sqs_count; /* Secondary Qset count */ + struct nicvf *snicvf[MAX_SQS_PER_VF]; +#endif + u8 rx_queues; + u8 tx_queues; + u8 max_queues; + void __iomem *reg_base; + bool link_up; + u8 duplex; + u32 speed; + struct page *rb_page; + u32 rb_page_offset; + bool rb_alloc_fail; + bool rb_work_scheduled; + struct delayed_work rbdr_work; + struct tasklet_struct rbdr_task; + struct tasklet_struct qs_err_task; + struct tasklet_struct cq_task; + struct nicvf_cq_poll *napi[8]; +#ifdef VNIC_RSS_SUPPORT + struct nicvf_rss_info rss_info; +#endif + u8 cpi_alg; + /* Interrupt coalescing settings */ + u32 cq_coalesce_usecs; + + u32 msg_enable; + struct nicvf_hw_stats hw_stats; + struct nicvf_drv_stats drv_stats; + struct bgx_stats bgx_stats; + struct work_struct reset_task; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; + char irq_name[NIC_VF_MSIX_VECTORS][20]; + bool irq_allocated[NIC_VF_MSIX_VECTORS]; + + /* VF <-> PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +} ____cacheline_aligned_in_smp; + +/* PF <--> VF Mailbox communication + * Eight 64bit registers are shared between PF and VF. + * Separate set for each VF. + * Writing '1' into last register mbx7 means end of message. + */ + +/* PF <--> VF mailbox communication */ +#define NIC_PF_VF_MAILBOX_SIZE 2 +#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */ + +/* Mailbox message types */ +#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */ +#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */ +#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */ +#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */ +#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */ +#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */ +#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */ +#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */ +#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */ +#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */ +#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */ +#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */ +#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */ +#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */ +#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */ +#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */ +#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */ +#define NIC_MBOX_MSG_ALLOC_SQS 0x12 /* Allocate secondary Qset */ +#define NIC_MBOX_MSG_NICVF_PTR 0x13 /* Send nicvf ptr to PF */ +#define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */ +#define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */ +#define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */ +#define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */ +#define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */ + +struct nic_cfg_msg { + u8 msg; + u8 vf_id; + u8 node_id; + bool tns_mode:1; + bool sqs_mode:1; + bool loopback_supported:1; + u8 mac_addr[ETH_ALEN]; +}; + +/* Qset configuration */ +struct qs_cfg_msg { + u8 msg; + u8 num; + u8 sqs_count; + u64 cfg; +}; + +/* Receive queue configuration */ +struct rq_cfg_msg { + u8 msg; + u8 qs_num; + u8 rq_num; + u64 cfg; +}; + +/* Send queue configuration */ +struct sq_cfg_msg { + u8 msg; + u8 qs_num; + u8 sq_num; + bool sqs_mode; + u64 cfg; +}; + +/* Set VF's MAC address */ +struct set_mac_msg { + u8 msg; + u8 vf_id; + u8 mac_addr[ETH_ALEN]; +}; + +/* Set Maximum frame size */ +struct set_frs_msg { + u8 msg; + u8 vf_id; + u16 max_frs; +}; + +/* Set CPI algorithm type */ +struct cpi_cfg_msg { + u8 msg; + u8 vf_id; + u8 rq_cnt; + u8 cpi_alg; +}; + +/* Get RSS table size */ +struct rss_sz_msg { + u8 msg; + u8 vf_id; + u16 ind_tbl_size; +}; + +/* Set RSS configuration */ +struct rss_cfg_msg { + u8 msg; + u8 vf_id; + u8 hash_bits; + u8 tbl_len; + u8 tbl_offset; +#define RSS_IND_TBL_LEN_PER_MBX_MSG 8 + u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG]; +}; + +struct bgx_stats_msg { + u8 msg; + u8 vf_id; + u8 rx; + u8 idx; + u64 stats; +}; + +/* Physical interface link status */ +struct bgx_link_status { + u8 msg; + u8 link_up; + u8 duplex; + u32 speed; +}; + +#ifdef VNIC_MULTI_QSET_SUPPORT +/* Get Extra Qset IDs */ +struct sqs_alloc { + u8 msg; + u8 vf_id; + u8 qs_count; +}; + +struct nicvf_ptr { + u8 msg; + u8 vf_id; + bool sqs_mode; + u8 sqs_id; + u64 nicvf; +}; +#endif + +/* Set interface in loopback mode */ +struct set_loopback { + u8 msg; + u8 vf_id; + bool enable; +}; + +/* 128 bit shared memory between PF and each VF */ +union nic_mbx { + struct { u8 msg; } msg; + struct nic_cfg_msg nic_cfg; + struct qs_cfg_msg qs; + struct rq_cfg_msg rq; + struct sq_cfg_msg sq; + struct set_mac_msg mac; + struct set_frs_msg frs; + struct cpi_cfg_msg cpi_cfg; + struct rss_sz_msg rss_size; + struct rss_cfg_msg rss_cfg; + struct bgx_stats_msg bgx_stats; + struct bgx_link_status link_status; +#ifdef VNIC_MULTI_QSET_SUPPORT + struct sqs_alloc sqs_alloc; + struct nicvf_ptr nicvf; +#endif + struct set_loopback lbk; +}; + +#define NIC_NODE_ID_MASK 0x03 +#define NIC_NODE_ID_SHIFT 44 + +static inline int nic_get_node_id(struct pci_dev *pdev) +{ + u64 addr = pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM); + return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK); +} + +int nicvf_set_real_num_queues(struct net_device *netdev, + int tx_queues, int rx_queues); +int nicvf_open(struct net_device *netdev); +int nicvf_stop(struct net_device *netdev); +int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx); +void nicvf_config_rss(struct nicvf *nic); +void nicvf_set_rss_key(struct nicvf *nic); +void nicvf_set_ethtool_ops(struct net_device *netdev); +void nicvf_update_stats(struct nicvf *nic); +void nicvf_update_lmac_stats(struct nicvf *nic); + +#endif /* NIC_H */ diff --git a/sys/dev/vnic/nic_main.c b/sys/dev/vnic/nic_main.c new file mode 100644 index 000000000000..de2f22ce0daa --- /dev/null +++ b/sys/dev/vnic/nic_main.c @@ -0,0 +1,1192 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/of.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nic" +#define DRV_VERSION "1.0" + +struct nicpf { + struct pci_dev *pdev; + u8 rev_id; + u8 node; + unsigned int flags; + u8 num_vf_en; /* No of VF enabled */ + bool vf_enabled[MAX_NUM_VFS_SUPPORTED]; + void __iomem *reg_base; /* Register start address */ +#ifdef VNIC_MULTI_QSET_SUPPORT + u8 num_sqs_en; /* Secondary qsets enabled */ + u64 nicvf[MAX_NUM_VFS_SUPPORTED]; + u8 vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF]; + u8 pqs_vf[MAX_NUM_VFS_SUPPORTED]; + bool sqs_used[MAX_NUM_VFS_SUPPORTED]; +#endif + struct pkind_cfg pkind; +#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) +#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) +#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) + u8 vf_lmac_map[MAX_LMAC]; + struct delayed_work dwork; + struct workqueue_struct *check_link; + u8 link[MAX_LMAC]; + u8 duplex[MAX_LMAC]; + u32 speed[MAX_LMAC]; + u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; + u16 rss_ind_tbl_size; + bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS]; + bool irq_allocated[NIC_PF_MSIX_VECTORS]; +}; + +/* Supported devices */ +static const struct pci_device_id nic_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Sunil Goutham"); +MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, nic_id_table); + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val) +{ + writeq_relaxed(val, nic->reg_base + offset); +} + +static u64 nic_reg_read(struct nicpf *nic, u64 offset) +{ + return readq_relaxed(nic->reg_base + offset); +} + +/* PF -> VF mailbox communication APIs */ +static void nic_enable_mbx_intr(struct nicpf *nic) +{ + /* Enable mailbox interrupt for all 128 VFs */ + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull); + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull); +} + +static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg) +{ + nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf)); +} + +static u64 nic_get_mbx_addr(int vf) +{ + return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT); +} + +/* Send a mailbox message to VF + * @vf: vf to which this message to be sent + * @mbx: Message to be sent + */ +static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx) +{ + void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf); + u64 *msg = (u64 *)mbx; + + /* In first revision HW, mbox interrupt is triggerred + * when PF writes to MBOX(1), in next revisions when + * PF writes to MBOX(0) + */ + if (nic->rev_id == 0) { + /* see the comment for nic_reg_write()/nic_reg_read() + * functions above + */ + writeq_relaxed(msg[0], mbx_addr); + writeq_relaxed(msg[1], mbx_addr + 8); + } else { + writeq_relaxed(msg[1], mbx_addr + 8); + writeq_relaxed(msg[0], mbx_addr); + } +} + +/* Responds to VF's READY message with VF's + * ID, node, MAC address e.t.c + * @vf: VF which sent READY message + */ +static void nic_mbx_send_ready(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + int bgx_idx, lmac; + const char *mac; + + mbx.nic_cfg.msg = NIC_MBOX_MSG_READY; + mbx.nic_cfg.vf_id = vf; + + if (nic->flags & NIC_TNS_ENABLED) + mbx.nic_cfg.tns_mode = NIC_TNS_MODE; + else + mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE; + + if (vf < MAX_LMAC) { + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac); + if (mac) + ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac); + } +#ifdef VNIC_MULTI_QSET_SUPPORT + mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false; +#endif + mbx.nic_cfg.node_id = nic->node; + + mbx.nic_cfg.loopback_supported = vf < MAX_LMAC; + + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_ack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_ACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* NACKs VF's mailbox message that PF is not able to + * complete the action + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_nack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_NACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* Flush all in flight receive packets to memory and + * bring down an active RQ + */ +static int nic_rcv_queue_sw_sync(struct nicpf *nic) +{ + u16 timeout = ~0x00; + + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01); + /* Wait till sync cycle is finished */ + while (timeout) { + if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1) + break; + timeout--; + } + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00); + if (!timeout) { + dev_err(&nic->pdev->dev, "Receive queue software sync failed"); + return 1; + } + return 0; +} + +/* Get BGX Rx/Tx stats and respond to VF's request */ +static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx) +{ + int bgx_idx, lmac; + union nic_mbx mbx = {}; + + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + + mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; + mbx.bgx_stats.vf_id = bgx->vf_id; + mbx.bgx_stats.rx = bgx->rx; + mbx.bgx_stats.idx = bgx->idx; + if (bgx->rx) + mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + else + mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + nic_send_msg_to_vf(nic, bgx->vf_id, &mbx); +} + +/* Update hardware min/max frame size */ +static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) +{ + if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) { + dev_err(&nic->pdev->dev, + "Invalid MTU setting from VF%d rejected, should be between %d and %d\n", + vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS); + return 1; + } + new_frs += ETH_HLEN; + if (new_frs <= nic->pkind.maxlen) + return 0; + + nic->pkind.maxlen = new_frs; + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind); + return 0; +} + +/* Set minimum transmit packet size */ +static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) +{ + int lmac; + u64 lmac_cfg; + + /* Max value that can be set is 60 */ + if (size > 60) + size = 60; + + for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { + lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); + lmac_cfg &= ~(0xF << 2); + lmac_cfg |= ((size / 4) << 2); + nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg); + } +} + +/* Function to check number of LMACs present and set VF::LMAC mapping. + * Mapping will be used while initializing channels. + */ +static void nic_set_lmac_vf_mapping(struct nicpf *nic) +{ + unsigned bgx_map = bgx_get_map(nic->node); + int bgx, next_bgx_lmac = 0; + int lmac, lmac_cnt = 0; + u64 lmac_credit; + + nic->num_vf_en = 0; + if (nic->flags & NIC_TNS_ENABLED) { + nic->num_vf_en = DEFAULT_NUM_VF_ENABLED; + return; + } + + for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { + if (!(bgx_map & (1 << bgx))) + continue; + lmac_cnt = bgx_get_lmac_count(nic->node, bgx); + for (lmac = 0; lmac < lmac_cnt; lmac++) + nic->vf_lmac_map[next_bgx_lmac++] = + NIC_SET_VF_LMAC_MAP(bgx, lmac); + nic->num_vf_en += lmac_cnt; + + /* Program LMAC credits */ + lmac_credit = (1ull << 1); /* channel credit enable */ + lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */ + /* 48KB BGX Tx buffer size, each unit is of size 16bytes */ + lmac_credit |= (((((48 * 1024) / lmac_cnt) - + NIC_HW_MAX_FRS) / 16) << 12); + lmac = bgx * MAX_LMAC_PER_BGX; + for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++) + nic_reg_write(nic, + NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), + lmac_credit); + } +} + +#define TNS_PORT0_BLOCK 6 +#define TNS_PORT1_BLOCK 7 +#define BGX0_BLOCK 8 +#define BGX1_BLOCK 9 + +static void nic_init_hw(struct nicpf *nic) +{ + int i; + + /* Reset NIC, in case the driver is repeatedly inserted and removed */ + nic_reg_write(nic, NIC_PF_SOFT_RESET, 1); + + /* Enable NIC HW block */ + nic_reg_write(nic, NIC_PF_CFG, 0x3); + + /* Enable backpressure */ + nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03); + + if (nic->flags & NIC_TNS_ENABLED) { + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_MODE << 7) | TNS_PORT0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_MODE << 7) | TNS_PORT1_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, + (1ULL << 63) | TNS_PORT0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), + (1ULL << 63) | TNS_PORT1_BLOCK); + + } else { + /* Disable TNS mode on both interfaces */ + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, + (1ULL << 63) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), + (1ULL << 63) | BGX1_BLOCK); + } + + /* PKIND configuration */ + nic->pkind.minlen = 0; + nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN; + nic->pkind.lenerr_en = 1; + nic->pkind.rx_hdr = 0; + nic->pkind.hdr_sl = 0; + + for (i = 0; i < NIC_MAX_PKIND; i++) + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3), + *(u64 *)&nic->pkind); + + nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS); + + /* Timer config */ + nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK); + + /* Enable VLAN ethertype matching and stripping */ + nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7, + (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q); +} + +/* Channel parse index configuration */ +static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) +{ + u32 vnic, bgx, lmac, chan; + u32 padd, cpi_count = 0; + u64 cpi_base, cpi, rssi_base, rssi; + u8 qset, rq_idx = 0; + + vnic = cfg->vf_id; + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + + chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX); + rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX); + + /* Rx channel configuration */ + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), + (1ull << 63) | (vnic << 0)); + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3), + ((u64)cfg->cpi_alg << 62) | (cpi_base << 48)); + + if (cfg->cpi_alg == CPI_ALG_NONE) + cpi_count = 1; + else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */ + cpi_count = 8; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */ + cpi_count = 16; + else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */ + cpi_count = NIC_MAX_CPI_PER_LMAC; + + /* RSS Qset, Qidx mapping */ + qset = cfg->vf_id; + rssi = rssi_base; + for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) { + nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), + (qset << 3) | rq_idx); + rq_idx++; + } + + rssi = 0; + cpi = cpi_base; + for (; cpi < (cpi_base + cpi_count); cpi++) { + /* Determine port to channel adder */ + if (cfg->cpi_alg != CPI_ALG_DIFF) + padd = cpi % cpi_count; + else + padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */ + + /* Leave RSS_SIZE as '0' to disable RSS */ + nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3), + (vnic << 24) | (padd << 16) | (rssi_base + rssi)); + + if ((rssi + 1) >= cfg->rq_cnt) + continue; + + if (cfg->cpi_alg == CPI_ALG_VLAN) + rssi++; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) + rssi = ((cpi - cpi_base) & 0xe) >> 1; + else if (cfg->cpi_alg == CPI_ALG_DIFF) + rssi = ((cpi - cpi_base) & 0x38) >> 3; + } + nic->cpi_base[cfg->vf_id] = cpi_base; +} + +#ifdef VNIC_RSS_SUPPORT +/* Responsds to VF with its RSS indirection table size */ +static void nic_send_rss_size(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + u64 *msg; + + msg = (u64 *)&mbx; + + mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; + mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* Receive side scaling configuration + * configure: + * - RSS index + * - indir table i.e hash::RQ mapping + * - no of hash bits to consider + */ +static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) +{ + u8 qset, idx = 0; + u64 cpi_cfg, cpi_base, rssi_base, rssi; + + cpi_base = nic->cpi_base[cfg->vf_id]; + cpi_cfg = nic_reg_read(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3)); + rssi_base = (cpi_cfg & 0x0FFF) + cfg->tbl_offset; + + rssi = rssi_base; + qset = cfg->vf_id; + + for (; rssi < (rssi_base + cfg->tbl_len); rssi++) { +#ifdef VNIC_MULTI_QSET_SUPPORT + u8 svf = cfg->ind_tbl[idx] >> 3; + + if (svf) + qset = nic->vf_sqs[cfg->vf_id][svf - 1]; + else + qset = cfg->vf_id; +#endif + nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), + (qset << 3) | (cfg->ind_tbl[idx] & 0x7)); + idx++; + } + + cpi_cfg &= ~(0xFULL << 20); + cpi_cfg |= (cfg->hash_bits << 20); + nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3), cpi_cfg); +} +#endif + +/* 4 level transmit side scheduler configutation + * for TNS bypass mode + * + * Sample configuration for SQ0 + * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0 + * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0 + * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0 + * VNIC3-SQ0 -> TL4(24) -> TL3[6] -> TL2[1] -> TL1[0] -> BGX0 + * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1 + * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1 + * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1 + * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1 + */ +static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, + struct sq_cfg_msg *sq) +{ + u32 bgx, lmac, chan; + u32 tl2, tl3, tl4; + u32 rr_quantum; + u8 sq_idx = sq->sq_num; + u8 pqs_vnic; + +#ifdef VNIC_MULTI_QSET_SUPPORT + if (sq->sqs_mode) + pqs_vnic = nic->pqs_vf[vnic]; + else +#endif + pqs_vnic = vnic; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]); + + /* 24 bytes for FCS, IPG and preamble */ + rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); + + tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + tl4 += sq_idx; +#ifdef VNIC_MULTI_QSET_SUPPORT + if (sq->sqs_mode) + tl4 += vnic * 8; +#endif + + tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); + nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | + ((u64)vnic << NIC_QS_ID_SHIFT) | + ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4); + nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3), + ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum); + + nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum); + chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + /* Enable backpressure on the channel */ + nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1); + + tl2 = tl3 >> 2; + nic_reg_write(nic, NIC_PF_TL3A_0_63_CFG | (tl2 << 3), tl2); + nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum); + /* No priorities as of now */ + nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00); +} + +#ifdef VNIC_MULTI_QSET_SUPPORT +/* Send primary nicvf pointer to secondary QS's VF */ +static void nic_send_pnicvf(struct nicpf *nic, int sqs) +{ + union nic_mbx mbx = {}; + + mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; + mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]]; + nic_send_msg_to_vf(nic, sqs, &mbx); +} + +/* Send SQS's nicvf pointer to primary QS's VF */ +static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf) +{ + union nic_mbx mbx = {}; + int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id]; + + mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; + mbx.nicvf.sqs_id = nicvf->sqs_id; + mbx.nicvf.nicvf = nic->nicvf[sqs_id]; + nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx); +} + +/* Find next available Qset that can be assigned as a + * secondary Qset to a VF. + */ +static int nic_nxt_avail_sqs(struct nicpf *nic) +{ + int sqs; + + for (sqs = 0; sqs < nic->num_sqs_en; sqs++) { + if (!nic->sqs_used[sqs]) + nic->sqs_used[sqs] = true; + else + continue; + return sqs + nic->num_vf_en; + } + return -1; +} + +/* Allocate additional Qsets for requested VF */ +static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs) +{ + union nic_mbx mbx = {}; + int idx, alloc_qs = 0; + int sqs_id; + + if (!nic->num_sqs_en) + goto send_mbox; + + for (idx = 0; idx < sqs->qs_count; idx++) { + sqs_id = nic_nxt_avail_sqs(nic); + if (sqs_id < 0) + break; + nic->vf_sqs[sqs->vf_id][idx] = sqs_id; + nic->pqs_vf[sqs_id] = sqs->vf_id; + alloc_qs++; + } + +send_mbox: + mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; + mbx.sqs_alloc.vf_id = sqs->vf_id; + mbx.sqs_alloc.qs_count = alloc_qs; + nic_send_msg_to_vf(nic, sqs->vf_id, &mbx); +} +#endif + +static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) +{ + int bgx_idx, lmac_idx; + + if (lbk->vf_id > MAX_LMAC) + return -1; + + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]); + lmac_idx = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]); + + bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable); + + return 0; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void nic_handle_mbx_intr(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + u64 *mbx_data; + u64 mbx_addr; + u64 reg_addr; + u64 cfg; + int bgx, lmac; + int i; + int ret = 0; + + nic->mbx_lock[vf] = true; + + mbx_addr = nic_get_mbx_addr(vf); + mbx_data = (u64 *)&mbx; + + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + *mbx_data = nic_reg_read(nic, mbx_addr); + mbx_data++; + mbx_addr += sizeof(u64); + } + + dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n", + __func__, mbx.msg.msg, vf); + switch (mbx.msg.msg) { + case NIC_MBOX_MSG_READY: + nic_mbx_send_ready(nic, vf); + if (vf < MAX_LMAC) { + nic->link[vf] = 0; + nic->duplex[vf] = 0; + nic->speed[vf] = 0; + } + ret = 1; + break; + case NIC_MBOX_MSG_QS_CFG: + reg_addr = NIC_PF_QSET_0_127_CFG | + (mbx.qs.num << NIC_QS_ID_SHIFT); + cfg = mbx.qs.cfg; +#ifdef VNIC_MULTI_QSET_SUPPORT + /* Check if its a secondary Qset */ + if (vf >= nic->num_vf_en) { + cfg = cfg & (~0x7FULL); + /* Assign this Qset to primary Qset's VF */ + cfg |= nic->pqs_vf[vf]; + } +#endif + nic_reg_write(nic, reg_addr, cfg); + break; + case NIC_MBOX_MSG_RQ_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_RQ_BP_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_RQ_SW_SYNC: + ret = nic_rcv_queue_sw_sync(nic); + break; + case NIC_MBOX_MSG_RQ_DROP_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_SQ_CFG: + reg_addr = NIC_PF_QSET_0_127_SQ_0_7_CFG | + (mbx.sq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.sq.sq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.sq.cfg); + nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq); + break; + case NIC_MBOX_MSG_SET_MAC: +#ifdef VNIC_MULTI_QSET_SUPPORT + if (vf >= nic->num_vf_en) + break; +#endif + lmac = mbx.mac.vf_id; + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); + bgx_set_lmac_mac(nic->node, bgx, lmac, mbx.mac.mac_addr); + break; + case NIC_MBOX_MSG_SET_MAX_FRS: + ret = nic_update_hw_frs(nic, mbx.frs.max_frs, + mbx.frs.vf_id); + break; + case NIC_MBOX_MSG_CPI_CFG: + nic_config_cpi(nic, &mbx.cpi_cfg); + break; +#ifdef VNIC_RSS_SUPPORT + case NIC_MBOX_MSG_RSS_SIZE: + nic_send_rss_size(nic, vf); + goto unlock; + case NIC_MBOX_MSG_RSS_CFG: + case NIC_MBOX_MSG_RSS_CFG_CONT: + nic_config_rss(nic, &mbx.rss_cfg); + break; +#endif + case NIC_MBOX_MSG_CFG_DONE: + /* Last message of VF config msg sequence */ + nic->vf_enabled[vf] = true; + goto unlock; + case NIC_MBOX_MSG_SHUTDOWN: + /* First msg in VF teardown sequence */ + nic->vf_enabled[vf] = false; +#ifdef VNIC_MULTI_QSET_SUPPORT + if (vf >= nic->num_vf_en) + nic->sqs_used[vf - nic->num_vf_en] = false; + nic->pqs_vf[vf] = 0; +#endif + break; +#ifdef VNIC_MULTI_QSET_SUPPORT + case NIC_MBOX_MSG_ALLOC_SQS: + nic_alloc_sqs(nic, &mbx.sqs_alloc); + goto unlock; + case NIC_MBOX_MSG_NICVF_PTR: + nic->nicvf[vf] = mbx.nicvf.nicvf; + break; + case NIC_MBOX_MSG_PNICVF_PTR: + nic_send_pnicvf(nic, vf); + goto unlock; + case NIC_MBOX_MSG_SNICVF_PTR: + nic_send_snicvf(nic, &mbx.nicvf); + goto unlock; +#endif + case NIC_MBOX_MSG_BGX_STATS: + nic_get_bgx_stats(nic, &mbx.bgx_stats); + goto unlock; + case NIC_MBOX_MSG_LOOPBACK: + ret = nic_config_loopback(nic, &mbx.lbk); + break; + default: + dev_err(&nic->pdev->dev, + "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); + break; + } + + if (!ret) + nic_mbx_send_ack(nic, vf); + else if (mbx.msg.msg != NIC_MBOX_MSG_READY) + nic_mbx_send_nack(nic, vf); +unlock: + nic->mbx_lock[vf] = false; +} + +static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) +{ + u64 intr; + u8 vf, vf_per_mbx_reg = 64; + + intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3)); + dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); + for (vf = 0; vf < vf_per_mbx_reg; vf++) { + if (intr & (1ULL << vf)) { + dev_dbg(&nic->pdev->dev, "Intr from VF %d\n", + vf + (mbx * vf_per_mbx_reg)); + + nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg)); + nic_clear_mbx_intr(nic, vf, mbx); + } + } +} + +static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq) +{ + struct nicpf *nic = (struct nicpf *)nic_irq; + + nic_mbx_intr_handler(nic, 0); + + return IRQ_HANDLED; +} + +static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq) +{ + struct nicpf *nic = (struct nicpf *)nic_irq; + + nic_mbx_intr_handler(nic, 1); + + return IRQ_HANDLED; +} + +static int nic_enable_msix(struct nicpf *nic) +{ + int i, ret; + + nic->num_vec = NIC_PF_MSIX_VECTORS; + + for (i = 0; i < nic->num_vec; i++) + nic->msix_entries[i].entry = i; + + ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); + if (ret) { + dev_err(&nic->pdev->dev, + "Request for #%d msix vectors failed\n", + nic->num_vec); + return ret; + } + + nic->msix_enabled = 1; + return 0; +} + +static void nic_disable_msix(struct nicpf *nic) +{ + if (nic->msix_enabled) { + pci_disable_msix(nic->pdev); + nic->msix_enabled = 0; + nic->num_vec = 0; + } +} + +static void nic_free_all_interrupts(struct nicpf *nic) +{ + int irq; + + for (irq = 0; irq < nic->num_vec; irq++) { + if (nic->irq_allocated[irq]) + free_irq(nic->msix_entries[irq].vector, nic); + nic->irq_allocated[irq] = false; + } +} + +static int nic_register_interrupts(struct nicpf *nic) +{ + int ret; + + /* Enable MSI-X */ + ret = nic_enable_msix(nic); + if (ret) + return ret; + + /* Register mailbox interrupt handlers */ + ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector, + nic_mbx0_intr_handler, 0, "NIC Mbox0", nic); + if (ret) + goto fail; + + nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true; + + ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector, + nic_mbx1_intr_handler, 0, "NIC Mbox1", nic); + if (ret) + goto fail; + + nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true; + + /* Enable mailbox interrupt */ + nic_enable_mbx_intr(nic); + return 0; + +fail: + dev_err(&nic->pdev->dev, "Request irq failed\n"); + nic_free_all_interrupts(nic); + return ret; +} + +static void nic_unregister_interrupts(struct nicpf *nic) +{ + nic_free_all_interrupts(nic); + nic_disable_msix(nic); +} + +#ifdef VNIC_MULTI_QSET_SUPPORT +int nic_num_sqs_en(struct nicpf *nic, int vf_en) +{ + int pos = 0, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE; + u16 total_vf; + + /* Check if its a multi-node environment */ + if (nr_node_ids > 1) + sqs_per_vf = MAX_SQS_PER_VF; + + pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV); + pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf); + return min(total_vf - vf_en, vf_en * sqs_per_vf); +} +#endif + +int nic_sriov_configure(struct pci_dev *pdev, int num_vfs_requested) +{ + struct nicpf *nic = pci_get_drvdata(pdev); + int vf_en; + int err; + + if (nic->num_vf_en == num_vfs_requested) + return num_vfs_requested; + + if (nic->flags & NIC_SRIOV_ENABLED) { + nic->flags &= ~NIC_SRIOV_ENABLED; + } + + nic->num_vf_en = 0; + if (num_vfs_requested > MAX_NUM_VFS_SUPPORTED || + num_vfs_requested < 0) + return -EINVAL; + + if (num_vfs_requested) { + vf_en = num_vfs_requested; +#ifdef VNIC_MULTI_QSET_SUPPORT + nic->num_sqs_en = nic_num_sqs_en(nic, num_vfs_requested); + vf_en += nic->num_sqs_en; +#endif + nic->num_vf_en = num_vfs_requested; + nic->flags |= NIC_SRIOV_ENABLED; + } + + return num_vfs_requested; +} + +static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic) +{ + int pos = 0; + int vf_en; + int err; + u16 total_vf_cnt; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) { + dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n"); + return -ENODEV; + } + + pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt); + if (total_vf_cnt < nic->num_vf_en) + nic->num_vf_en = total_vf_cnt; + + if (!total_vf_cnt) + return 0; + + vf_en = nic->num_vf_en; +#ifdef VNIC_MULTI_QSET_SUPPORT + nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en); + vf_en += nic->num_sqs_en; +#endif + + if (err) { + dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n", + vf_en); + nic->num_vf_en = 0; + return err; + } + + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", + vf_en); + + nic->flags |= NIC_SRIOV_ENABLED; + return 0; +} + +/* Poll for BGX LMAC link status and update corresponding VF + * if there is a change, valid only if internal L2 switch + * is not present otherwise VF link is always treated as up + */ +static void nic_poll_for_link(struct work_struct *work) +{ + union nic_mbx mbx = {}; + struct nicpf *nic; + struct bgx_link_status link; + u8 vf, bgx, lmac; + + nic = container_of(work, struct nicpf, dwork.work); + + mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; + + for (vf = 0; vf < nic->num_vf_en; vf++) { + /* Poll only if VF is UP */ + if (!nic->vf_enabled[vf]) + continue; + + /* Get BGX, LMAC indices for the VF */ + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + /* Get interface link status */ + bgx_get_lmac_link_state(nic->node, bgx, lmac, &link); + + /* Inform VF only if link status changed */ + if (nic->link[vf] == link.link_up) + continue; + + if (!nic->mbx_lock[vf]) { + nic->link[vf] = link.link_up; + nic->duplex[vf] = link.duplex; + nic->speed[vf] = link.speed; + + /* Send a mbox message to VF with current link status */ + mbx.link_status.link_up = link.link_up; + mbx.link_status.duplex = link.duplex; + mbx.link_status.speed = link.speed; + nic_send_msg_to_vf(nic, vf, &mbx); + } + } + queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2); +} + +static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct nicpf *nic; + int err; + + BUILD_BUG_ON(sizeof(union nic_mbx) > 16); + + nic = devm_kzalloc(dev, sizeof(*nic), GFP_KERNEL); + if (!nic) + return -ENOMEM; + + pci_set_drvdata(pdev, nic); + + nic->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!nic->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + pci_read_config_byte(pdev, PCI_REVISION_ID, &nic->rev_id); + + nic->node = nic_get_node_id(pdev); + + /* By default set NIC in TNS bypass mode */ + nic->flags &= ~NIC_TNS_ENABLED; + nic_set_lmac_vf_mapping(nic); + + /* Initialize hardware */ + nic_init_hw(nic); + + /* Set RSS TBL size for each VF */ + nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + + /* Register interrupts */ + err = nic_register_interrupts(nic); + if (err) + goto err_release_regions; + + /* Configure SRIOV */ + err = nic_sriov_init(pdev, nic); + if (err) + goto err_unregister_interrupts; + + if (nic->flags & NIC_TNS_ENABLED) + return 0; + + /* Register a physical link status poll fn() */ + nic->check_link = alloc_workqueue("check_link_status", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (!nic->check_link) { + err = -ENOMEM; + goto err_disable_sriov; + } + + INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link); + queue_delayed_work(nic->check_link, &nic->dwork, 0); + + return 0; + +err_disable_sriov: +err_unregister_interrupts: + nic_unregister_interrupts(nic); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void nic_remove(struct pci_dev *pdev) +{ + struct nicpf *nic = pci_get_drvdata(pdev); + + if (nic->check_link) { + /* Destroy work Queue */ + cancel_delayed_work(&nic->dwork); + flush_workqueue(nic->check_link); + destroy_workqueue(nic->check_link); + } + + nic_unregister_interrupts(nic); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver nic_driver = { + .name = DRV_NAME, + .id_table = nic_id_table, + .probe = nic_probe, + .remove = nic_remove, + .sriov_configure = nic_sriov_configure, +}; + +static int __init nic_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&nic_driver); +} + +static void __exit nic_cleanup_module(void) +{ + pci_unregister_driver(&nic_driver); +} + +module_init(nic_init_module); +module_exit(nic_cleanup_module); diff --git a/sys/dev/vnic/nic_reg.h b/sys/dev/vnic/nic_reg.h new file mode 100644 index 000000000000..b66f7e38633f --- /dev/null +++ b/sys/dev/vnic/nic_reg.h @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef NIC_REG_H +#define NIC_REG_H + +#define NIC_PF_REG_COUNT 29573 +#define NIC_VF_REG_COUNT 249 + +/* Physical function register offsets */ +#define NIC_PF_CFG (0x0000) +#define NIC_PF_STATUS (0x0010) +#define NIC_PF_INTR_TIMER_CFG (0x0030) +#define NIC_PF_BIST_STATUS (0x0040) +#define NIC_PF_SOFT_RESET (0x0050) +#define NIC_PF_TCP_TIMER (0x0060) +#define NIC_PF_BP_CFG (0x0080) +#define NIC_PF_RRM_CFG (0x0088) +#define NIC_PF_CQM_CF (0x00A0) +#define NIC_PF_CNM_CF (0x00A8) +#define NIC_PF_CNM_STATUS (0x00B0) +#define NIC_PF_CQ_AVG_CFG (0x00C0) +#define NIC_PF_RRM_AVG_CFG (0x00C8) +#define NIC_PF_INTF_0_1_SEND_CFG (0x0200) +#define NIC_PF_INTF_0_1_BP_CFG (0x0208) +#define NIC_PF_INTF_0_1_BP_DIS_0_1 (0x0210) +#define NIC_PF_INTF_0_1_BP_SW_0_1 (0x0220) +#define NIC_PF_RBDR_BP_STATE_0_3 (0x0240) +#define NIC_PF_MAILBOX_INT (0x0410) +#define NIC_PF_MAILBOX_INT_W1S (0x0430) +#define NIC_PF_MAILBOX_ENA_W1C (0x0450) +#define NIC_PF_MAILBOX_ENA_W1S (0x0470) +#define NIC_PF_RX_ETYPE_0_7 (0x0500) +#define NIC_PF_PKIND_0_15_CFG (0x0600) +#define NIC_PF_ECC0_FLIP0 (0x1000) +#define NIC_PF_ECC1_FLIP0 (0x1008) +#define NIC_PF_ECC2_FLIP0 (0x1010) +#define NIC_PF_ECC3_FLIP0 (0x1018) +#define NIC_PF_ECC0_FLIP1 (0x1080) +#define NIC_PF_ECC1_FLIP1 (0x1088) +#define NIC_PF_ECC2_FLIP1 (0x1090) +#define NIC_PF_ECC3_FLIP1 (0x1098) +#define NIC_PF_ECC0_CDIS (0x1100) +#define NIC_PF_ECC1_CDIS (0x1108) +#define NIC_PF_ECC2_CDIS (0x1110) +#define NIC_PF_ECC3_CDIS (0x1118) +#define NIC_PF_BIST0_STATUS (0x1280) +#define NIC_PF_BIST1_STATUS (0x1288) +#define NIC_PF_BIST2_STATUS (0x1290) +#define NIC_PF_BIST3_STATUS (0x1298) +#define NIC_PF_ECC0_SBE_INT (0x2000) +#define NIC_PF_ECC0_SBE_INT_W1S (0x2008) +#define NIC_PF_ECC0_SBE_ENA_W1C (0x2010) +#define NIC_PF_ECC0_SBE_ENA_W1S (0x2018) +#define NIC_PF_ECC0_DBE_INT (0x2100) +#define NIC_PF_ECC0_DBE_INT_W1S (0x2108) +#define NIC_PF_ECC0_DBE_ENA_W1C (0x2110) +#define NIC_PF_ECC0_DBE_ENA_W1S (0x2118) +#define NIC_PF_ECC1_SBE_INT (0x2200) +#define NIC_PF_ECC1_SBE_INT_W1S (0x2208) +#define NIC_PF_ECC1_SBE_ENA_W1C (0x2210) +#define NIC_PF_ECC1_SBE_ENA_W1S (0x2218) +#define NIC_PF_ECC1_DBE_INT (0x2300) +#define NIC_PF_ECC1_DBE_INT_W1S (0x2308) +#define NIC_PF_ECC1_DBE_ENA_W1C (0x2310) +#define NIC_PF_ECC1_DBE_ENA_W1S (0x2318) +#define NIC_PF_ECC2_SBE_INT (0x2400) +#define NIC_PF_ECC2_SBE_INT_W1S (0x2408) +#define NIC_PF_ECC2_SBE_ENA_W1C (0x2410) +#define NIC_PF_ECC2_SBE_ENA_W1S (0x2418) +#define NIC_PF_ECC2_DBE_INT (0x2500) +#define NIC_PF_ECC2_DBE_INT_W1S (0x2508) +#define NIC_PF_ECC2_DBE_ENA_W1C (0x2510) +#define NIC_PF_ECC2_DBE_ENA_W1S (0x2518) +#define NIC_PF_ECC3_SBE_INT (0x2600) +#define NIC_PF_ECC3_SBE_INT_W1S (0x2608) +#define NIC_PF_ECC3_SBE_ENA_W1C (0x2610) +#define NIC_PF_ECC3_SBE_ENA_W1S (0x2618) +#define NIC_PF_ECC3_DBE_INT (0x2700) +#define NIC_PF_ECC3_DBE_INT_W1S (0x2708) +#define NIC_PF_ECC3_DBE_ENA_W1C (0x2710) +#define NIC_PF_ECC3_DBE_ENA_W1S (0x2718) +#define NIC_PF_CPI_0_2047_CFG (0x200000) +#define NIC_PF_RSSI_0_4097_RQ (0x220000) +#define NIC_PF_LMAC_0_7_CFG (0x240000) +#define NIC_PF_LMAC_0_7_SW_XOFF (0x242000) +#define NIC_PF_LMAC_0_7_CREDIT (0x244000) +#define NIC_PF_CHAN_0_255_TX_CFG (0x400000) +#define NIC_PF_CHAN_0_255_RX_CFG (0x420000) +#define NIC_PF_CHAN_0_255_SW_XOFF (0x440000) +#define NIC_PF_CHAN_0_255_CREDIT (0x460000) +#define NIC_PF_CHAN_0_255_RX_BP_CFG (0x480000) +#define NIC_PF_SW_SYNC_RX (0x490000) +#define NIC_PF_SW_SYNC_RX_DONE (0x490008) +#define NIC_PF_TL2_0_63_CFG (0x500000) +#define NIC_PF_TL2_0_63_PRI (0x520000) +#define NIC_PF_TL2_0_63_SH_STATUS (0x580000) +#define NIC_PF_TL3A_0_63_CFG (0x5F0000) +#define NIC_PF_TL3_0_255_CFG (0x600000) +#define NIC_PF_TL3_0_255_CHAN (0x620000) +#define NIC_PF_TL3_0_255_PIR (0x640000) +#define NIC_PF_TL3_0_255_SW_XOFF (0x660000) +#define NIC_PF_TL3_0_255_CNM_RATE (0x680000) +#define NIC_PF_TL3_0_255_SH_STATUS (0x6A0000) +#define NIC_PF_TL4A_0_255_CFG (0x6F0000) +#define NIC_PF_TL4_0_1023_CFG (0x800000) +#define NIC_PF_TL4_0_1023_SW_XOFF (0x820000) +#define NIC_PF_TL4_0_1023_SH_STATUS (0x840000) +#define NIC_PF_TL4A_0_1023_CNM_RATE (0x880000) +#define NIC_PF_TL4A_0_1023_CNM_STATUS (0x8A0000) +#define NIC_PF_VF_0_127_MAILBOX_0_1 (0x20002030) +#define NIC_PF_VNIC_0_127_TX_STAT_0_4 (0x20004000) +#define NIC_PF_VNIC_0_127_RX_STAT_0_13 (0x20004100) +#define NIC_PF_QSET_0_127_LOCK_0_15 (0x20006000) +#define NIC_PF_QSET_0_127_CFG (0x20010000) +#define NIC_PF_QSET_0_127_RQ_0_7_CFG (0x20010400) +#define NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG (0x20010420) +#define NIC_PF_QSET_0_127_RQ_0_7_BP_CFG (0x20010500) +#define NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1 (0x20010600) +#define NIC_PF_QSET_0_127_SQ_0_7_CFG (0x20010C00) +#define NIC_PF_QSET_0_127_SQ_0_7_CFG2 (0x20010C08) +#define NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1 (0x20010D00) + +#define NIC_PF_MSIX_VEC_0_18_ADDR (0x000000) +#define NIC_PF_MSIX_VEC_0_CTL (0x000008) +#define NIC_PF_MSIX_PBA_0 (0x0F0000) + +/* Virtual function register offsets */ +#define NIC_VNIC_CFG (0x000020) +#define NIC_VF_PF_MAILBOX_0_1 (0x000130) +#define NIC_VF_INT (0x000200) +#define NIC_VF_INT_W1S (0x000220) +#define NIC_VF_ENA_W1C (0x000240) +#define NIC_VF_ENA_W1S (0x000260) + +#define NIC_VNIC_RSS_CFG (0x0020E0) +#define NIC_VNIC_RSS_KEY_0_4 (0x002200) +#define NIC_VNIC_TX_STAT_0_4 (0x004000) +#define NIC_VNIC_RX_STAT_0_13 (0x004100) +#define NIC_QSET_RQ_GEN_CFG (0x010010) + +#define NIC_QSET_CQ_0_7_CFG (0x010400) +#define NIC_QSET_CQ_0_7_CFG2 (0x010408) +#define NIC_QSET_CQ_0_7_THRESH (0x010410) +#define NIC_QSET_CQ_0_7_BASE (0x010420) +#define NIC_QSET_CQ_0_7_HEAD (0x010428) +#define NIC_QSET_CQ_0_7_TAIL (0x010430) +#define NIC_QSET_CQ_0_7_DOOR (0x010438) +#define NIC_QSET_CQ_0_7_STATUS (0x010440) +#define NIC_QSET_CQ_0_7_STATUS2 (0x010448) +#define NIC_QSET_CQ_0_7_DEBUG (0x010450) + +#define NIC_QSET_RQ_0_7_CFG (0x010600) +#define NIC_QSET_RQ_0_7_STAT_0_1 (0x010700) + +#define NIC_QSET_SQ_0_7_CFG (0x010800) +#define NIC_QSET_SQ_0_7_THRESH (0x010810) +#define NIC_QSET_SQ_0_7_BASE (0x010820) +#define NIC_QSET_SQ_0_7_HEAD (0x010828) +#define NIC_QSET_SQ_0_7_TAIL (0x010830) +#define NIC_QSET_SQ_0_7_DOOR (0x010838) +#define NIC_QSET_SQ_0_7_STATUS (0x010840) +#define NIC_QSET_SQ_0_7_DEBUG (0x010848) +#define NIC_QSET_SQ_0_7_CNM_CHG (0x010860) +#define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900) + +#define NIC_QSET_RBDR_0_1_CFG (0x010C00) +#define NIC_QSET_RBDR_0_1_THRESH (0x010C10) +#define NIC_QSET_RBDR_0_1_BASE (0x010C20) +#define NIC_QSET_RBDR_0_1_HEAD (0x010C28) +#define NIC_QSET_RBDR_0_1_TAIL (0x010C30) +#define NIC_QSET_RBDR_0_1_DOOR (0x010C38) +#define NIC_QSET_RBDR_0_1_STATUS0 (0x010C40) +#define NIC_QSET_RBDR_0_1_STATUS1 (0x010C48) +#define NIC_QSET_RBDR_0_1_PREFETCH_STATUS (0x010C50) + +#define NIC_VF_MSIX_VECTOR_0_19_ADDR (0x000000) +#define NIC_VF_MSIX_VECTOR_0_19_CTL (0x000008) +#define NIC_VF_MSIX_PBA (0x0F0000) + +/* Offsets within registers */ +#define NIC_MSIX_VEC_SHIFT 4 +#define NIC_Q_NUM_SHIFT 18 +#define NIC_QS_ID_SHIFT 21 +#define NIC_VF_NUM_SHIFT 21 + +/* Port kind configuration register */ +struct pkind_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_42_63:22; + u64 hdr_sl:5; /* Header skip length */ + u64 rx_hdr:3; /* TNS Receive header present */ + u64 lenerr_en:1;/* L2 length error check enable */ + u64 reserved_32_32:1; + u64 maxlen:16; /* Max frame size */ + u64 minlen:16; /* Min frame size */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 minlen:16; + u64 maxlen:16; + u64 reserved_32_32:1; + u64 lenerr_en:1; + u64 rx_hdr:3; + u64 hdr_sl:5; + u64 reserved_42_63:22; +#endif +}; + +#endif /* NIC_REG_H */ diff --git a/sys/dev/vnic/nicvf_main.c b/sys/dev/vnic/nicvf_main.c new file mode 100644 index 000000000000..9e9e0d53a8f0 --- /dev/null +++ b/sys/dev/vnic/nicvf_main.c @@ -0,0 +1,1667 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/prefetch.h> +#include <linux/irq.h> + +#include "nic_reg.h" +#include "nic.h" +#include "nicvf_queues.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nicvf" +#define DRV_VERSION "1.0" + +/* Supported devices */ +static const struct pci_device_id nicvf_id_table[] = { + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, + PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Sunil Goutham"); +MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, nicvf_id_table); + +static int debug = 0x00; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "Debug message level bitmap"); + +static int cpi_alg = CPI_ALG_NONE; +module_param(cpi_alg, int, S_IRUGO); +MODULE_PARM_DESC(cpi_alg, + "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); + +static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) +{ +#ifdef VNIC_MULTI_QSET_SUPPORT + if (nic->sqs_mode) + return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); + else +#endif + return qidx; +} + +static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic, + struct sk_buff *skb) +{ + if (skb->len <= 64) + nic->drv_stats.rx_frames_64++; + else if (skb->len <= 127) + nic->drv_stats.rx_frames_127++; + else if (skb->len <= 255) + nic->drv_stats.rx_frames_255++; + else if (skb->len <= 511) + nic->drv_stats.rx_frames_511++; + else if (skb->len <= 1023) + nic->drv_stats.rx_frames_1023++; + else if (skb->len <= 1518) + nic->drv_stats.rx_frames_1518++; + else + nic->drv_stats.rx_frames_jumbo++; +} + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) +{ + writeq_relaxed(val, nic->reg_base + offset); +} + +u64 nicvf_reg_read(struct nicvf *nic, u64 offset) +{ + return readq_relaxed(nic->reg_base + offset); +} + +void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, + u64 qidx, u64 val) +{ + void __iomem *addr = nic->reg_base + offset; + + writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); +} + +u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) +{ + void __iomem *addr = nic->reg_base + offset; + + return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); +} + +/* VF -> PF mailbox communication */ +static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) +{ + u64 *msg = (u64 *)mbx; + + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); +} + +int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) +{ + int timeout = NIC_MBOX_MSG_TIMEOUT; + int sleep = 10; + + nic->pf_acked = false; + nic->pf_nacked = false; + + nicvf_write_to_mbx(nic, mbx); + + /* Wait for previous message to be acked, timeout 2sec */ + while (!nic->pf_acked) { + if (nic->pf_nacked) + return -EINVAL; + msleep(sleep); + if (nic->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + netdev_err(nic->netdev, + "PF didn't ack to mbox msg %d from VF%d\n", + (mbx->msg.msg & 0xFF), nic->vf_id); + return -EBUSY; + } + } + return 0; +} + +/* Checks if VF is able to comminicate with PF +* and also gets the VNIC number this VF is associated to. +*/ +static int nicvf_check_pf_ready(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_READY; + if (nicvf_send_msg_to_pf(nic, &mbx)) { + netdev_err(nic->netdev, + "PF didn't respond to READY msg\n"); + return 0; + } + + return 1; +} + +static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) +{ + if (bgx->rx) + nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; + else + nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; +} + +static void nicvf_handle_mbx_intr(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + u64 *mbx_data; + u64 mbx_addr; + int i; + + mbx_addr = NIC_VF_PF_MAILBOX_0_1; + mbx_data = (u64 *)&mbx; + + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + *mbx_data = nicvf_reg_read(nic, mbx_addr); + mbx_data++; + mbx_addr += sizeof(u64); + } + + netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); + switch (mbx.msg.msg) { + case NIC_MBOX_MSG_READY: + nic->pf_acked = true; + nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; + nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; + nic->node = mbx.nic_cfg.node_id; + ether_addr_copy(nic->netdev->dev_addr, mbx.nic_cfg.mac_addr); +#ifdef VNIC_MULTI_QSET_SUPPORT + nic->sqs_mode = mbx.nic_cfg.sqs_mode; +#endif + nic->loopback_supported = mbx.nic_cfg.loopback_supported; + nic->link_up = false; + nic->duplex = 0; + nic->speed = 0; + break; + case NIC_MBOX_MSG_ACK: + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_NACK: + nic->pf_nacked = true; + break; +#ifdef VNIC_RSS_SUPPORT + case NIC_MBOX_MSG_RSS_SIZE: + nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; + nic->pf_acked = true; + break; +#endif + case NIC_MBOX_MSG_BGX_STATS: + nicvf_read_bgx_stats(nic, &mbx.bgx_stats); + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_BGX_LINK_CHANGE: + nic->pf_acked = true; + nic->link_up = mbx.link_status.link_up; + nic->duplex = mbx.link_status.duplex; + nic->speed = mbx.link_status.speed; + if (nic->link_up) { + netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n", + nic->netdev->name, nic->speed, + nic->duplex == DUPLEX_FULL ? + "Full duplex" : "Half duplex"); + netif_carrier_on(nic->netdev); + netif_tx_start_all_queues(nic->netdev); + } else { + netdev_info(nic->netdev, "%s: Link is Down\n", + nic->netdev->name); + netif_carrier_off(nic->netdev); + netif_tx_stop_all_queues(nic->netdev); + } + break; +#ifdef VNIC_MULTI_QSET_SUPPORT + case NIC_MBOX_MSG_ALLOC_SQS: + nic->sqs_count = mbx.sqs_alloc.qs_count; + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_SNICVF_PTR: + /* Primary VF: make note of secondary VF's pointer + * to be used while packet transmission. + */ + nic->snicvf[mbx.nicvf.sqs_id] = + (struct nicvf *)mbx.nicvf.nicvf; + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_PNICVF_PTR: + /* Secondary VF/Qset: make note of primary VF's pointer + * to be used while packet reception, to handover packet + * to primary VF's netdev. + */ + nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; + nic->pf_acked = true; + break; +#endif + default: + netdev_err(nic->netdev, + "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); + break; + } + nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); +} + +static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) +{ + union nic_mbx mbx = {}; + + mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; + mbx.mac.vf_id = nic->vf_id; + ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); + + return nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_config_cpi(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; + mbx.cpi_cfg.vf_id = nic->vf_id; + mbx.cpi_cfg.cpi_alg = nic->cpi_alg; + mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; + + nicvf_send_msg_to_pf(nic, &mbx); +} + +#ifdef VNIC_RSS_SUPPORT +static void nicvf_get_rss_size(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; + mbx.rss_size.vf_id = nic->vf_id; + nicvf_send_msg_to_pf(nic, &mbx); +} + +void nicvf_config_rss(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + struct nicvf_rss_info *rss = &nic->rss_info; + int ind_tbl_len = rss->rss_size; + int i, nextq = 0; + + mbx.rss_cfg.vf_id = nic->vf_id; + mbx.rss_cfg.hash_bits = rss->hash_bits; + while (ind_tbl_len) { + mbx.rss_cfg.tbl_offset = nextq; + mbx.rss_cfg.tbl_len = min(ind_tbl_len, + RSS_IND_TBL_LEN_PER_MBX_MSG); + mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? + NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; + + for (i = 0; i < mbx.rss_cfg.tbl_len; i++) + mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; + + nicvf_send_msg_to_pf(nic, &mbx); + + ind_tbl_len -= mbx.rss_cfg.tbl_len; + } +} + +void nicvf_set_rss_key(struct nicvf *nic) +{ + struct nicvf_rss_info *rss = &nic->rss_info; + u64 key_addr = NIC_VNIC_RSS_KEY_0_4; + int idx; + + for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { + nicvf_reg_write(nic, key_addr, rss->key[idx]); + key_addr += sizeof(u64); + } +} + +static int nicvf_rss_init(struct nicvf *nic) +{ + struct nicvf_rss_info *rss = &nic->rss_info; + int idx; + + nicvf_get_rss_size(nic); + + if (cpi_alg != CPI_ALG_NONE) { + rss->enable = false; + rss->hash_bits = 0; + return 0; + } + + rss->enable = true; + + /* Using the HW reset value for now */ + rss->key[0] = 0xFEED0BADFEED0BADULL; + rss->key[1] = 0xFEED0BADFEED0BADULL; + rss->key[2] = 0xFEED0BADFEED0BADULL; + rss->key[3] = 0xFEED0BADFEED0BADULL; + rss->key[4] = 0xFEED0BADFEED0BADULL; + + nicvf_set_rss_key(nic); + + rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; + nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); + + rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); + + for (idx = 0; idx < rss->rss_size; idx++) + rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, + nic->rx_queues); + nicvf_config_rss(nic); + return 1; +} +#endif + +#ifdef VNIC_MULTI_QSET_SUPPORT +/* Request PF to allocate additional Qsets */ +static void nicvf_request_sqs(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + int sqs; + int sqs_count = nic->sqs_count; + int rx_queues = 0, tx_queues = 0; + + /* Only primary VF should request */ + if (nic->sqs_mode || !nic->sqs_count) + return; + + mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; + mbx.sqs_alloc.vf_id = nic->vf_id; + mbx.sqs_alloc.qs_count = nic->sqs_count; + if (nicvf_send_msg_to_pf(nic, &mbx)) { + /* No response from PF */ + nic->sqs_count = 0; + return; + } + + /* Return if no Secondary Qsets available */ + if (!nic->sqs_count) + return; + + if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) + rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; + if (nic->tx_queues > MAX_SND_QUEUES_PER_QS) + tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS; + + /* Set no of Rx/Tx queues in each of the SQsets */ + for (sqs = 0; sqs < nic->sqs_count; sqs++) { + mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; + mbx.nicvf.vf_id = nic->vf_id; + mbx.nicvf.sqs_id = sqs; + nicvf_send_msg_to_pf(nic, &mbx); + + nic->snicvf[sqs]->sqs_id = sqs; + if (rx_queues > MAX_RCV_QUEUES_PER_QS) { + nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; + rx_queues -= MAX_RCV_QUEUES_PER_QS; + } else { + nic->snicvf[sqs]->qs->rq_cnt = rx_queues; + rx_queues = 0; + } + + if (tx_queues > MAX_SND_QUEUES_PER_QS) { + nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; + tx_queues -= MAX_SND_QUEUES_PER_QS; + } else { + nic->snicvf[sqs]->qs->sq_cnt = tx_queues; + tx_queues = 0; + } + + nic->snicvf[sqs]->qs->cq_cnt = + max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); + + /* Initialize secondary Qset's queues and its interrupts */ + nicvf_open(nic->snicvf[sqs]->netdev); + } + + /* Update stack with actual Rx/Tx queue count allocated */ + if (sqs_count != nic->sqs_count) + nicvf_set_real_num_queues(nic->netdev, + nic->tx_queues, nic->rx_queues); +} + +/* Send this Qset's nicvf pointer to PF. + * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs + * so that packets received by these Qsets can use primary VF's netdev + */ +static void nicvf_send_vf_struct(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; + mbx.nicvf.sqs_mode = nic->sqs_mode; + mbx.nicvf.nicvf = (u64)nic; + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_get_primary_vf_struct(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; + nicvf_send_msg_to_pf(nic, &mbx); +} +#endif + +int nicvf_set_real_num_queues(struct net_device *netdev, + int tx_queues, int rx_queues) +{ + int err = 0; + + err = netif_set_real_num_tx_queues(netdev, tx_queues); + if (err) { + netdev_err(netdev, + "Failed to set no of Tx queues: %d\n", tx_queues); + return err; + } + + err = netif_set_real_num_rx_queues(netdev, rx_queues); + if (err) + netdev_err(netdev, + "Failed to set no of Rx queues: %d\n", rx_queues); + return err; +} + +static int nicvf_init_resources(struct nicvf *nic) +{ + int err; + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + + /* Enable Qset */ + nicvf_qset_config(nic, true); + + /* Initialize queues and HW for data transfer */ + err = nicvf_config_data_transfer(nic, true); + if (err) { + netdev_err(nic->netdev, + "Failed to alloc/config VF's QSet resources\n"); + return err; + } + + /* Send VF config done msg to PF */ + nicvf_write_to_mbx(nic, &mbx); + + return 0; +} + +static void nicvf_snd_pkt_handler(struct net_device *netdev, + struct cmp_queue *cq, + struct cqe_send_t *cqe_tx, int cqe_type) +{ + struct sk_buff *skb = NULL; + struct nicvf *nic = netdev_priv(netdev); + struct snd_queue *sq; + struct sq_hdr_subdesc *hdr; + + sq = &nic->qs->sq[cqe_tx->sq_idx]; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); + if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) + return; + + netdev_dbg(nic->netdev, + "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n", + __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, + cqe_tx->sqe_ptr, hdr->subdesc_cnt); + + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); + skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; + /* For TSO offloaded packets only one head SKB needs to be freed */ + if (skb) { + prefetch(skb); + dev_consume_skb_any(skb); + sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; + } +} + +static inline void nicvf_set_rxhash(struct net_device *netdev, + struct cqe_rx_t *cqe_rx, + struct sk_buff *skb) +{ + u8 hash_type; + u32 hash; + + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (cqe_rx->rss_alg) { + case RSS_ALG_TCP_IP: + case RSS_ALG_UDP_IP: + hash_type = PKT_HASH_TYPE_L4; + hash = cqe_rx->rss_tag; + break; + case RSS_ALG_IP: + hash_type = PKT_HASH_TYPE_L3; + hash = cqe_rx->rss_tag; + break; + default: + hash_type = PKT_HASH_TYPE_NONE; + hash = 0; + } + + skb_set_hash(skb, hash, hash_type); +} + +static void nicvf_rcv_pkt_handler(struct net_device *netdev, + struct napi_struct *napi, + struct cmp_queue *cq, + struct cqe_rx_t *cqe_rx, int cqe_type) +{ + struct sk_buff *skb; + struct nicvf *nic = netdev_priv(netdev); + int err = 0; + int rq_idx; + + rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); + +#ifdef VNIC_MULTI_QSET_SUPPORT + if (nic->sqs_mode) { + /* Use primary VF's 'nicvf' struct */ + nic = nic->pnicvf; + netdev = nic->netdev; + } +#endif + /* Check for errors */ + err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx); + if (err && !cqe_rx->rb_cnt) + return; + + skb = nicvf_get_rcv_skb(nic, cqe_rx); + if (!skb) { + netdev_dbg(nic->netdev, "Packet not received\n"); + return; + } + + if (netif_msg_pktdata(nic)) { + netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, + skb, skb->len); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, + skb->data, skb->len, true); + } + + /* If error packet, drop it here */ + if (err) { + dev_kfree_skb_any(skb); + return; + } + + nicvf_set_rx_frame_cnt(nic, skb); + + nicvf_set_rxhash(netdev, cqe_rx, skb); + + skb_record_rx_queue(skb, rq_idx); + if (netdev->hw_features & NETIF_F_RXCSUM) { + /* HW by default verifies TCP/UDP/SCTP checksums */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + skb_checksum_none_assert(skb); + } + + skb->protocol = eth_type_trans(skb, netdev); + + /* Check for stripped VLAN */ + if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + ntohs(cqe_rx->vlan_tci)); + + if (napi && (netdev->features & NETIF_F_GRO)) + napi_gro_receive(napi, skb); + else + netif_receive_skb(skb); +} + +static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, + struct napi_struct *napi, int budget) +{ + int processed_cqe, work_done = 0, tx_done = 0; + int cqe_count, cqe_head; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct cmp_queue *cq = &qs->cq[cq_idx]; + struct cqe_rx_t *cq_desc; + struct netdev_queue *txq; + + spin_lock_bh(&cq->lock); +loop: + processed_cqe = 0; + /* Get no of valid CQ entries to process */ + cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); + cqe_count &= CQ_CQE_COUNT; + if (!cqe_count) + goto done; + + /* Get head of the valid CQ entries */ + cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; + cqe_head &= 0xFFFF; + + netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", + __func__, cq_idx, cqe_count, cqe_head); + while (processed_cqe < cqe_count) { + /* Get the CQ descriptor */ + cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); + cqe_head++; + cqe_head &= (cq->dmem.q_len - 1); + /* Initiate prefetch for next descriptor */ + prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); + + if ((work_done >= budget) && napi && + (cq_desc->cqe_type != CQE_TYPE_SEND)) { + break; + } + + netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", + cq_idx, cq_desc->cqe_type); + switch (cq_desc->cqe_type) { + case CQE_TYPE_RX: + nicvf_rcv_pkt_handler(netdev, napi, cq, + cq_desc, CQE_TYPE_RX); + work_done++; + break; + case CQE_TYPE_SEND: + nicvf_snd_pkt_handler(netdev, cq, + (void *)cq_desc, CQE_TYPE_SEND); + tx_done++; + break; + case CQE_TYPE_INVALID: + case CQE_TYPE_RX_SPLIT: + case CQE_TYPE_RX_TCP: + case CQE_TYPE_SEND_PTP: + /* Ignore for now */ + break; + } + processed_cqe++; + } + netdev_dbg(nic->netdev, + "%s CQ%d processed_cqe %d work_done %d budget %d\n", + __func__, cq_idx, processed_cqe, work_done, budget); + + /* Ring doorbell to inform H/W to reuse processed CQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, + cq_idx, processed_cqe); + + if ((work_done < budget) && napi) + goto loop; + +done: + /* Wakeup TXQ if its stopped earlier due to SQ full */ + if (tx_done) { +#ifdef VNIC_MULTI_QSET_SUPPORT + netdev = nic->pnicvf->netdev; +#endif + txq = netdev_get_tx_queue(netdev, + nicvf_netdev_qidx(nic, cq_idx)); +#ifdef VNIC_MULTI_QSET_SUPPORT + nic = nic->pnicvf; +#endif + if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { + netif_tx_start_queue(txq); + nic->drv_stats.txq_wake++; + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit queue wakeup SQ%d\n", + netdev->name, cq_idx); + } + } + + spin_unlock_bh(&cq->lock); + return work_done; +} + +static int nicvf_poll(struct napi_struct *napi, int budget) +{ + u64 cq_head; + int work_done = 0; + struct net_device *netdev = napi->dev; + struct nicvf *nic = netdev_priv(netdev); + struct nicvf_cq_poll *cq; + + cq = container_of(napi, struct nicvf_cq_poll, napi); + work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); + + if (work_done < budget) { + /* Slow packet rate, exit polling */ + napi_complete(napi); + /* Re-enable interrupts */ + cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, + cq->cq_idx); + nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, + cq->cq_idx, cq_head); + nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); + } + return work_done; +} + +/* Qset error interrupt handler + * + * As of now only CQ errors are handled + */ +static void nicvf_handle_qs_err(unsigned long data) +{ + struct nicvf *nic = (struct nicvf *)data; + struct queue_set *qs = nic->qs; + int qidx; + u64 status; + + netif_tx_disable(nic->netdev); + + /* Check if it is CQ err */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, + qidx); + if (!(status & CQ_ERR_MASK)) + continue; + /* Process already queued CQEs and reconfig CQ */ + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + nicvf_sq_disable(nic, qidx); + nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); + nicvf_cmp_queue_config(nic, qs, qidx, true); + nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); + nicvf_sq_enable(nic, &qs->sq[qidx], qidx); + + nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); + } + + netif_tx_start_all_queues(nic->netdev); + /* Re-enable Qset error interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); +} + +static inline void nicvf_dump_intr_status(struct nicvf *nic) +{ + if (netif_msg_intr(nic)) + netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n", + nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT)); +} + +static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) +{ + struct nicvf *nic = (struct nicvf *)nicvf_irq; + u64 intr; + + nicvf_dump_intr_status(nic); + + intr = nicvf_reg_read(nic, NIC_VF_INT); + /* Check for spurious interrupt */ + if (!(intr & NICVF_INTR_MBOX_MASK)) + return IRQ_HANDLED; + + nicvf_handle_mbx_intr(nic); + + return IRQ_HANDLED; +} + +static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) +{ + struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; + struct nicvf *nic = cq_poll->nicvf; + int qidx = cq_poll->cq_idx; + + nicvf_dump_intr_status(nic); + + /* Disable interrupts */ + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + + /* Schedule NAPI */ + napi_schedule(&cq_poll->napi); + + /* Clear interrupt */ + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); + + return IRQ_HANDLED; +} + +static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) +{ + struct nicvf *nic = (struct nicvf *)nicvf_irq; + u8 qidx; + + + nicvf_dump_intr_status(nic); + + /* Disable RBDR interrupt and schedule softirq */ + for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { + if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) + continue; + nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); + tasklet_hi_schedule(&nic->rbdr_task); + /* Clear interrupt */ + nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); + } + + return IRQ_HANDLED; +} + +static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) +{ + struct nicvf *nic = (struct nicvf *)nicvf_irq; + + nicvf_dump_intr_status(nic); + + /* Disable Qset err interrupt and schedule softirq */ + nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); + tasklet_hi_schedule(&nic->qs_err_task); + nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); + + return IRQ_HANDLED; +} + +static int nicvf_enable_msix(struct nicvf *nic) +{ + int ret, vec; + + nic->num_vec = NIC_VF_MSIX_VECTORS; + + for (vec = 0; vec < nic->num_vec; vec++) + nic->msix_entries[vec].entry = vec; + + ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); + if (ret) { + netdev_err(nic->netdev, + "Req for #%d msix vectors failed\n", nic->num_vec); + return 0; + } + nic->msix_enabled = 1; + return 1; +} + +static void nicvf_disable_msix(struct nicvf *nic) +{ + if (nic->msix_enabled) { + pci_disable_msix(nic->pdev); + nic->msix_enabled = 0; + nic->num_vec = 0; + } +} + +static int nicvf_register_interrupts(struct nicvf *nic) +{ + int irq, ret = 0; + int vector; + + for_each_cq_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d CQ%d", + nic->vf_id, irq); + + for_each_sq_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d SQ%d", + nic->vf_id, irq - NICVF_INTR_ID_SQ); + + for_each_rbdr_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d RBDR%d", + nic->vf_id, irq - NICVF_INTR_ID_RBDR); + + /* Register CQ interrupts */ + for (irq = 0; irq < nic->qs->cq_cnt; irq++) { + vector = nic->msix_entries[irq].vector; + ret = request_irq(vector, nicvf_intr_handler, + 0, nic->irq_name[irq], nic->napi[irq]); + if (ret) + goto err; + nic->irq_allocated[irq] = true; + } + + /* Register RBDR interrupt */ + for (irq = NICVF_INTR_ID_RBDR; + irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { + vector = nic->msix_entries[irq].vector; + ret = request_irq(vector, nicvf_rbdr_intr_handler, + 0, nic->irq_name[irq], nic); + if (ret) + goto err; + nic->irq_allocated[irq] = true; + } + + /* Register QS error interrupt */ + sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], + "NICVF%d Qset error", nic->vf_id); + irq = NICVF_INTR_ID_QS_ERR; + ret = request_irq(nic->msix_entries[irq].vector, + nicvf_qs_err_intr_handler, + 0, nic->irq_name[irq], nic); + if (!ret) + nic->irq_allocated[irq] = true; + +err: + if (ret) + netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); + + return ret; +} + +static void nicvf_unregister_interrupts(struct nicvf *nic) +{ + int irq; + + /* Free registered interrupts */ + for (irq = 0; irq < nic->num_vec; irq++) { + if (!nic->irq_allocated[irq]) + continue; + + if (irq < NICVF_INTR_ID_SQ) + free_irq(nic->msix_entries[irq].vector, nic->napi[irq]); + else + free_irq(nic->msix_entries[irq].vector, nic); + + nic->irq_allocated[irq] = false; + } + + /* Disable MSI-X */ + nicvf_disable_msix(nic); +} + +/* Initialize MSIX vectors and register MISC interrupt. + * Send READY message to PF to check if its alive + */ +static int nicvf_register_misc_interrupt(struct nicvf *nic) +{ + int ret = 0; + int irq = NICVF_INTR_ID_MISC; + + /* Return if mailbox interrupt is already registered */ + if (nic->msix_enabled) + return 0; + + /* Enable MSI-X */ + if (!nicvf_enable_msix(nic)) + return 1; + + sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); + /* Register Misc interrupt */ + ret = request_irq(nic->msix_entries[irq].vector, + nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); + + if (ret) + return ret; + nic->irq_allocated[irq] = true; + + /* Enable mailbox interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); + + /* Check if VF is able to communicate with PF */ + if (!nicvf_check_pf_ready(nic)) { + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + nicvf_unregister_interrupts(nic); + return 1; + } + + return 0; +} + +static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + int qid = skb_get_queue_mapping(skb); + struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); + + /* Check for minimum packet length */ + if (skb->len <= ETH_HLEN) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { + netif_tx_stop_queue(txq); + nic->drv_stats.txq_stop++; + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit ring full, stopping SQ%d\n", + netdev->name, qid); + return NETDEV_TX_BUSY; + } + + return NETDEV_TX_OK; +} + +static inline void nicvf_free_cq_poll(struct nicvf *nic) +{ + struct nicvf_cq_poll *cq_poll = NULL; + int qidx; + + for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { + cq_poll = nic->napi[qidx]; + if (!cq_poll) + continue; + nic->napi[qidx] = NULL; + kfree(cq_poll); + } +} + +int nicvf_stop(struct net_device *netdev) +{ + int irq, qidx; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct nicvf_cq_poll *cq_poll = NULL; + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; + nicvf_send_msg_to_pf(nic, &mbx); + + netif_carrier_off(netdev); + netif_tx_stop_all_queues(nic->netdev); + +#ifdef VNIC_MULTI_QSET_SUPPORT + /* Teardown secondary qsets first */ + if (!nic->sqs_mode) { + for (qidx = 0; qidx < nic->sqs_count; qidx++) { + if (!nic->snicvf[qidx]) + continue; + nicvf_stop(nic->snicvf[qidx]->netdev); + nic->snicvf[qidx] = NULL; + } + } +#endif + + /* Disable RBDR & QS error interrupts */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); + nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); + } + nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); + nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); + + /* Wait for pending IRQ handlers to finish */ + for (irq = 0; irq < nic->num_vec; irq++) + synchronize_irq(nic->msix_entries[irq].vector); + + tasklet_kill(&nic->rbdr_task); + tasklet_kill(&nic->qs_err_task); + if (nic->rb_work_scheduled) + cancel_delayed_work_sync(&nic->rbdr_work); + + for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { + cq_poll = nic->napi[qidx]; + if (!cq_poll) + continue; + napi_synchronize(&cq_poll->napi); + /* CQ intr is enabled while napi_complete, + * so disable it now + */ + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); + napi_disable(&cq_poll->napi); + netif_napi_del(&cq_poll->napi); + } + + netif_tx_disable(netdev); + + /* Free resources */ + nicvf_config_data_transfer(nic, false); + + /* Disable HW Qset */ + nicvf_qset_config(nic, false); + + /* disable mailbox interrupt */ + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + + nicvf_unregister_interrupts(nic); + + nicvf_free_cq_poll(nic); + +#ifdef VNIC_MULTI_QSET_SUPPORT + /* Clear multiqset info */ + nic->pnicvf = nic; + nic->sqs_count = 0; +#endif + + return 0; +} + +int nicvf_open(struct net_device *netdev) +{ + int err, qidx; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct nicvf_cq_poll *cq_poll = NULL; + + nic->mtu = netdev->mtu; + + netif_carrier_off(netdev); + + err = nicvf_register_misc_interrupt(nic); + if (err) + return err; + + /* Register NAPI handler for processing CQEs */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); + if (!cq_poll) { + err = -ENOMEM; + goto napi_del; + } + cq_poll->cq_idx = qidx; + cq_poll->nicvf = nic; + netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, + NAPI_POLL_WEIGHT); + napi_enable(&cq_poll->napi); + nic->napi[qidx] = cq_poll; + } + + /* Check if we got MAC address from PF or else generate a radom MAC */ + if (is_zero_ether_addr(netdev->dev_addr)) { + eth_hw_addr_random(netdev); + nicvf_hw_set_mac_addr(nic, netdev); + } + + /* Init tasklet for handling Qset err interrupt */ + tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, + (unsigned long)nic); + + /* Init RBDR tasklet which will refill RBDR */ + tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, + (unsigned long)nic); + INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); + + /* Configure CPI alorithm */ + nic->cpi_alg = cpi_alg; + if (!nic->sqs_mode) + nicvf_config_cpi(nic); + +#ifdef VNIC_MULTI_QSET_SUPPORT + nicvf_request_sqs(nic); + if (nic->sqs_mode) + nicvf_get_primary_vf_struct(nic); +#endif + +#ifdef VNIC_RSS_SUPPORT + /* Configure receive side scaling */ + if (!nic->sqs_mode) + nicvf_rss_init(nic); +#endif + + err = nicvf_register_interrupts(nic); + if (err) + goto cleanup; + + /* Initialize the queues */ + err = nicvf_init_resources(nic); + if (err) + goto cleanup; + + /* Make sure queue initialization is written */ + wmb(); + + nicvf_reg_write(nic, NIC_VF_INT, -1); + /* Enable Qset err interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); + + /* Enable completion queue interrupt */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); + + /* Enable RBDR threshold interrupt */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + + nic->drv_stats.txq_stop = 0; + nic->drv_stats.txq_wake = 0; + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + + return 0; +cleanup: + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + nicvf_unregister_interrupts(nic); + tasklet_kill(&nic->qs_err_task); + tasklet_kill(&nic->rbdr_task); +napi_del: + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + cq_poll = nic->napi[qidx]; + if (!cq_poll) + continue; + napi_disable(&cq_poll->napi); + netif_napi_del(&cq_poll->napi); + } + nicvf_free_cq_poll(nic); + return err; +} + +static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) +{ + union nic_mbx mbx = {}; + + mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; + mbx.frs.max_frs = mtu; + mbx.frs.vf_id = nic->vf_id; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + +static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nicvf *nic = netdev_priv(netdev); + + if (new_mtu > NIC_HW_MAX_FRS) + return -EINVAL; + + if (new_mtu < NIC_HW_MIN_FRS) + return -EINVAL; + + if (nicvf_update_hw_max_frs(nic, new_mtu)) + return -EINVAL; + netdev->mtu = new_mtu; + nic->mtu = new_mtu; + + return 0; +} + +static int nicvf_set_mac_address(struct net_device *netdev, void *p) +{ + struct sockaddr *addr = p; + struct nicvf *nic = netdev_priv(netdev); + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + + if (nic->msix_enabled) + if (nicvf_hw_set_mac_addr(nic, netdev)) + return -EBUSY; + + return 0; +} + +void nicvf_update_lmac_stats(struct nicvf *nic) +{ + int stat = 0; + union nic_mbx mbx = {}; + + if (!netif_running(nic->netdev)) + return; + + mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; + mbx.bgx_stats.vf_id = nic->vf_id; + /* Rx stats */ + mbx.bgx_stats.rx = 1; + while (stat < BGX_RX_STATS_COUNT) { + mbx.bgx_stats.idx = stat; + if (nicvf_send_msg_to_pf(nic, &mbx)) + return; + stat++; + } + + stat = 0; + + /* Tx stats */ + mbx.bgx_stats.rx = 0; + while (stat < BGX_TX_STATS_COUNT) { + mbx.bgx_stats.idx = stat; + if (nicvf_send_msg_to_pf(nic, &mbx)) + return; + stat++; + } +} + +void nicvf_update_stats(struct nicvf *nic) +{ + int qidx; + struct nicvf_hw_stats *stats = &nic->hw_stats; + struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + struct queue_set *qs = nic->qs; + +#define GET_RX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) +#define GET_TX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) + + stats->rx_bytes = GET_RX_STATS(RX_OCTS); + stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); + stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); + stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); + stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); + stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); + stats->rx_drop_red = GET_RX_STATS(RX_RED); + stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); + stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); + stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); + stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); + stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); + stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); + stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); + + stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS); + stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST); + stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST); + stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST); + stats->tx_drops = GET_TX_STATS(TX_DROP); + + drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok + + stats->tx_bcast_frames_ok + + stats->tx_mcast_frames_ok; + drv_stats->rx_drops = stats->rx_drop_red + + stats->rx_drop_overrun; + drv_stats->tx_drops = stats->tx_drops; + + /* Update RQ and SQ stats */ + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_update_rq_stats(nic, qidx); + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_update_sq_stats(nic, qidx); +} + +static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nicvf *nic = netdev_priv(netdev); + struct nicvf_hw_stats *hw_stats = &nic->hw_stats; + struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + + nicvf_update_stats(nic); + + stats->rx_bytes = hw_stats->rx_bytes; + stats->rx_packets = drv_stats->rx_frames_ok; + stats->rx_dropped = drv_stats->rx_drops; + stats->multicast = hw_stats->rx_mcast_frames; + + stats->tx_bytes = hw_stats->tx_bytes_ok; + stats->tx_packets = drv_stats->tx_frames_ok; + stats->tx_dropped = drv_stats->tx_drops; + + return stats; +} + +static void nicvf_tx_timeout(struct net_device *dev) +{ + struct nicvf *nic = netdev_priv(dev); + + if (netif_msg_tx_err(nic)) + netdev_warn(dev, "%s: Transmit timed out, resetting\n", + dev->name); + + schedule_work(&nic->reset_task); +} + +static void nicvf_reset_task(struct work_struct *work) +{ + struct nicvf *nic; + + nic = container_of(work, struct nicvf, reset_task); + + if (!netif_running(nic->netdev)) + return; + + nicvf_stop(nic->netdev); + nicvf_open(nic->netdev); + nic->netdev->trans_start = jiffies; +} + +static int nicvf_config_loopback(struct nicvf *nic, + netdev_features_t features) +{ + union nic_mbx mbx = {}; + + mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; + mbx.lbk.vf_id = nic->vf_id; + mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + +static netdev_features_t nicvf_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct nicvf *nic = netdev_priv(netdev); + + if ((features & NETIF_F_LOOPBACK) && + netif_running(netdev) && !nic->loopback_supported) + features &= ~NETIF_F_LOOPBACK; + + return features; +} + +static int nicvf_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct nicvf *nic = netdev_priv(netdev); + netdev_features_t changed = features ^ netdev->features; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + nicvf_config_vlan_stripping(nic, features); + + if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) + return nicvf_config_loopback(nic, features); + + return 0; +} + +static const struct net_device_ops nicvf_netdev_ops = { + .ndo_open = nicvf_open, + .ndo_stop = nicvf_stop, + .ndo_start_xmit = nicvf_xmit, + .ndo_change_mtu = nicvf_change_mtu, + .ndo_set_mac_address = nicvf_set_mac_address, + .ndo_get_stats64 = nicvf_get_stats64, + .ndo_tx_timeout = nicvf_tx_timeout, + .ndo_fix_features = nicvf_fix_features, + .ndo_set_features = nicvf_set_features, +}; + +static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct net_device *netdev; + struct nicvf *nic; + int err, qcount; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + qcount = MAX_CMP_QUEUES_PER_QS; + +#ifdef VNIC_MULTI_QSET_SUPPORT + /* Restrict multiqset support only for host bound VFs */ + if (pdev->is_virtfn) { + /* Set max number of queues per VF */ + qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS); + qcount = min(qcount, + (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); + } +#endif + + netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); + if (!netdev) { + err = -ENOMEM; + goto err_release_regions; + } + + pci_set_drvdata(pdev, netdev); + + SET_NETDEV_DEV(netdev, &pdev->dev); + + nic = netdev_priv(netdev); + nic->netdev = netdev; + nic->pdev = pdev; + nic->pnicvf = nic; + nic->max_queues = qcount; + + /* MAP VF's configuration registers */ + nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!nic->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_free_netdev; + } + + err = nicvf_set_qset_resources(nic); + if (err) + goto err_free_netdev; + + /* Check if PF is alive and get MAC address for this VF */ + err = nicvf_register_misc_interrupt(nic); + if (err) + goto err_free_netdev; + +#ifdef VNIC_MULTI_QSET_SUPPORT + nicvf_send_vf_struct(nic); + + /* Check if this VF is in QS only mode */ + if (nic->sqs_mode) + return 0; +#endif + + err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); + if (err) + goto err_unregister_interrupts; + + netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_GRO | + NETIF_F_HW_VLAN_CTAG_RX); +#ifdef VNIC_RSS_SUPPORT + netdev->hw_features |= NETIF_F_RXHASH; +#endif + + netdev->features |= netdev->hw_features; + netdev->hw_features |= NETIF_F_LOOPBACK; + + netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM; + + netdev->netdev_ops = &nicvf_netdev_ops; + netdev->watchdog_timeo = NICVF_TX_TIMEOUT; + + INIT_WORK(&nic->reset_task, nicvf_reset_task); + + err = register_netdev(netdev); + if (err) { + dev_err(dev, "Failed to register netdevice\n"); + goto err_unregister_interrupts; + } + + nic->msg_enable = debug; + + nicvf_set_ethtool_ops(netdev); + + return 0; + +err_unregister_interrupts: + nicvf_unregister_interrupts(nic); +err_free_netdev: + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + return err; +} + +static void nicvf_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct nicvf *nic = netdev_priv(netdev); + struct net_device *pnetdev = nic->pnicvf->netdev; + + /* Check if this Qset is assigned to different VF. + * If yes, clean primary and all secondary Qsets. + */ + if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) + unregister_netdev(pnetdev); + nicvf_unregister_interrupts(nic); + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static void nicvf_shutdown(struct pci_dev *pdev) +{ + nicvf_remove(pdev); +} + +static struct pci_driver nicvf_driver = { + .name = DRV_NAME, + .id_table = nicvf_id_table, + .probe = nicvf_probe, + .remove = nicvf_remove, + .shutdown = nicvf_shutdown, +}; + +static int __init nicvf_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&nicvf_driver); +} + +static void __exit nicvf_cleanup_module(void) +{ + pci_unregister_driver(&nicvf_driver); +} + +module_init(nicvf_init_module); +module_exit(nicvf_cleanup_module); diff --git a/sys/dev/vnic/nicvf_queues.c b/sys/dev/vnic/nicvf_queues.c new file mode 100644 index 000000000000..9fa902403fc5 --- /dev/null +++ b/sys/dev/vnic/nicvf_queues.c @@ -0,0 +1,1467 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/etherdevice.h> +#include <net/ip.h> +#include <net/tso.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "nicvf_queues.h" + +struct rbuf_info { + struct page *page; + void *data; + u64 offset; +}; + +#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES)) + +/* Poll a register for a specific value */ +static int nicvf_poll_reg(struct nicvf *nic, int qidx, + u64 reg, int bit_pos, int bits, int val) +{ + u64 bit_mask; + u64 reg_val; + int timeout = 10; + + bit_mask = (1ULL << bits) - 1; + bit_mask = (bit_mask << bit_pos); + + while (timeout) { + reg_val = nicvf_queue_reg_read(nic, reg, qidx); + if (((reg_val & bit_mask) >> bit_pos) == val) + return 0; + usleep_range(1000, 2000); + timeout--; + } + netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg); + return 1; +} + +/* Allocate memory for a queue's descriptors */ +static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem, + int q_len, int desc_size, int align_bytes) +{ + dmem->q_len = q_len; + dmem->size = (desc_size * q_len) + align_bytes; + /* Save address, need it while freeing */ + dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size, + &dmem->dma, GFP_KERNEL); + if (!dmem->unalign_base) + return -ENOMEM; + + /* Align memory address for 'align_bytes' */ + dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes); + dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma); + return 0; +} + +/* Free queue's descriptor memory */ +static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) +{ + if (!dmem) + return; + + dma_free_coherent(&nic->pdev->dev, dmem->size, + dmem->unalign_base, dmem->dma); + dmem->unalign_base = NULL; + dmem->base = NULL; +} + +/* Allocate buffer for packet reception + * HW returns memory address where packet is DMA'ed but not a pointer + * into RBDR ring, so save buffer address at the start of fragment and + * align the start address to a cache aligned address + */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, + u32 buf_len, u64 **rbuf) +{ + u64 data; + struct rbuf_info *rinfo; + int order = get_order(buf_len); + + /* Check if request can be accomodated in previous allocated page */ + if (nic->rb_page) { + if ((nic->rb_page_offset + buf_len + buf_len) > + (PAGE_SIZE << order)) { + nic->rb_page = NULL; + } else { + nic->rb_page_offset += buf_len; + get_page(nic->rb_page); + } + } + + /* Allocate a new page */ + if (!nic->rb_page) { + nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, + order); + if (!nic->rb_page) { + netdev_err(nic->netdev, + "Failed to allocate new rcv buffer\n"); + return -ENOMEM; + } + nic->rb_page_offset = 0; + } + + data = (u64)page_address(nic->rb_page) + nic->rb_page_offset; + + /* Align buffer addr to cache line i.e 128 bytes */ + rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data)); + /* Save page address for reference updation */ + rinfo->page = nic->rb_page; + /* Store start address for later retrieval */ + rinfo->data = (void *)data; + /* Store alignment offset */ + rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data); + + data += rinfo->offset; + + /* Give next aligned address to hw for DMA */ + *rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES); + return 0; +} + +/* Retrieve actual buffer start address and build skb for received packet */ +static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic, + u64 rb_ptr, int len) +{ + struct sk_buff *skb; + struct rbuf_info *rinfo; + + rb_ptr = (u64)phys_to_virt(rb_ptr); + /* Get buffer start address and alignment offset */ + rinfo = GET_RBUF_INFO(rb_ptr); + + /* Now build an skb to give to stack */ + skb = build_skb(rinfo->data, RCV_FRAG_LEN); + if (!skb) { + put_page(rinfo->page); + return NULL; + } + + /* Set correct skb->data */ + skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES); + + prefetch((void *)rb_ptr); + return skb; +} + +/* Allocate RBDR ring and populate receive buffers */ +static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, + int ring_len, int buf_size) +{ + int idx; + u64 *rbuf; + struct rbdr_entry_t *desc; + int err; + + err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len, + sizeof(struct rbdr_entry_t), + NICVF_RCV_BUF_ALIGN_BYTES); + if (err) + return err; + + rbdr->desc = rbdr->dmem.base; + /* Buffer size has to be in multiples of 128 bytes */ + rbdr->dma_size = buf_size; + rbdr->enable = true; + rbdr->thresh = RBDR_THRESH; + + nic->rb_page = NULL; + for (idx = 0; idx < ring_len; idx++) { + err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, + &rbuf); + if (err) + return err; + + desc = GET_RBDR_DESC(rbdr, idx); + desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + } + return 0; +} + +/* Free RBDR ring and its receive buffers */ +static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) +{ + int head, tail; + u64 buf_addr; + struct rbdr_entry_t *desc; + struct rbuf_info *rinfo; + + if (!rbdr) + return; + + rbdr->enable = false; + if (!rbdr->dmem.base) + return; + + head = rbdr->head; + tail = rbdr->tail; + + /* Free SKBs */ + while (head != tail) { + desc = GET_RBDR_DESC(rbdr, head); + buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; + rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); + put_page(rinfo->page); + head++; + head &= (rbdr->dmem.q_len - 1); + } + /* Free SKB of tail desc */ + desc = GET_RBDR_DESC(rbdr, tail); + buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; + rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); + put_page(rinfo->page); + + /* Free RBDR ring */ + nicvf_free_q_desc_mem(nic, &rbdr->dmem); +} + +/* Refill receive buffer descriptors with new buffers. + */ +static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) +{ + struct queue_set *qs = nic->qs; + int rbdr_idx = qs->rbdr_cnt; + int tail, qcount; + int refill_rb_cnt; + struct rbdr *rbdr; + struct rbdr_entry_t *desc; + u64 *rbuf; + int new_rb = 0; + +refill: + if (!rbdr_idx) + return; + rbdr_idx--; + rbdr = &qs->rbdr[rbdr_idx]; + /* Check if it's enabled */ + if (!rbdr->enable) + goto next_rbdr; + + /* Get no of desc's to be refilled */ + qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx); + qcount &= 0x7FFFF; + /* Doorbell can be ringed with a max of ring size minus 1 */ + if (qcount >= (qs->rbdr_len - 1)) + goto next_rbdr; + else + refill_rb_cnt = qs->rbdr_len - qcount - 1; + + /* Start filling descs from tail */ + tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; + while (refill_rb_cnt) { + tail++; + tail &= (rbdr->dmem.q_len - 1); + + if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf)) + break; + + desc = GET_RBDR_DESC(rbdr, tail); + desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + refill_rb_cnt--; + new_rb++; + } + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Check if buffer allocation failed */ + if (refill_rb_cnt) + nic->rb_alloc_fail = true; + else + nic->rb_alloc_fail = false; + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + rbdr_idx, new_rb); +next_rbdr: + /* Re-enable RBDR interrupts only if buffer allocation is success */ + if (!nic->rb_alloc_fail && rbdr->enable) + nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx); + + if (rbdr_idx) + goto refill; +} + +/* Alloc rcv buffers in non-atomic mode for better success */ +void nicvf_rbdr_work(struct work_struct *work) +{ + struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work); + + nicvf_refill_rbdr(nic, GFP_KERNEL); + if (nic->rb_alloc_fail) + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + else + nic->rb_work_scheduled = false; +} + +/* In Softirq context, alloc rcv buffers in atomic mode */ +void nicvf_rbdr_task(unsigned long data) +{ + struct nicvf *nic = (struct nicvf *)data; + + nicvf_refill_rbdr(nic, GFP_ATOMIC); + if (nic->rb_alloc_fail) { + nic->rb_work_scheduled = true; + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + } +} + +/* Initialize completion queue */ +static int nicvf_init_cmp_queue(struct nicvf *nic, + struct cmp_queue *cq, int q_len) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE, + NICVF_CQ_BASE_ALIGN_BYTES); + if (err) + return err; + + cq->desc = cq->dmem.base; + cq->thresh = CMP_QUEUE_CQE_THRESH; + nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1; + + return 0; +} + +static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) +{ + if (!cq) + return; + if (!cq->dmem.base) + return; + + nicvf_free_q_desc_mem(nic, &cq->dmem); +} + +/* Initialize transmit queue */ +static int nicvf_init_snd_queue(struct nicvf *nic, + struct snd_queue *sq, int q_len) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE, + NICVF_SQ_BASE_ALIGN_BYTES); + if (err) + return err; + + sq->desc = sq->dmem.base; + sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->skbuff) + return -ENOMEM; + sq->head = 0; + sq->tail = 0; + atomic_set(&sq->free_cnt, q_len - 1); + sq->thresh = SND_QUEUE_THRESH; + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + + return 0; +} + +static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) +{ + if (!sq) + return; + if (!sq->dmem.base) + return; + + if (sq->tso_hdrs) + dma_free_coherent(&nic->pdev->dev, + sq->dmem.q_len * TSO_HEADER_SIZE, + sq->tso_hdrs, sq->tso_hdrs_phys); + + kfree(sq->skbuff); + nicvf_free_q_desc_mem(nic, &sq->dmem); +} + +static void nicvf_reclaim_snd_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0); + /* Check if SQ is stopped */ + if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01)) + return; + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); +} + +static void nicvf_reclaim_rcv_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + union nic_mbx mbx = {}; + + /* Make sure all packets in the pipeline are written back into mem */ + mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC; + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_reclaim_cmp_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable timer threshold (doesn't get reset upon CQ reset */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0); + /* Disable completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0); + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); +} + +static void nicvf_reclaim_rbdr(struct nicvf *nic, + struct rbdr *rbdr, int qidx) +{ + u64 tmp, fifo_state; + int timeout = 10; + + /* Save head and tail pointers for feeing up buffers */ + rbdr->head = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_HEAD, + qidx) >> 3; + rbdr->tail = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_TAIL, + qidx) >> 3; + + /* If RBDR FIFO is in 'FAIL' state then do a reset first + * before relaiming. + */ + fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx); + if (((fifo_state >> 62) & 0x03) == 0x3) + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + /* Disable RBDR */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; + while (1) { + tmp = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_PREFETCH_STATUS, + qidx); + if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF)) + break; + usleep_range(1000, 2000); + timeout--; + if (!timeout) { + netdev_err(nic->netdev, + "Failed polling on prefetch status\n"); + return; + } + } + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02)) + return; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; +} + +void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features) +{ + u64 rq_cfg; +#ifdef VNIC_MULTI_QSET_SUPPORT + int sqs = 0; +#endif + + rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0); + + /* Enable first VLAN stripping */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + rq_cfg |= (1ULL << 25); + else + rq_cfg &= ~(1ULL << 25); + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); + +#ifdef VNIC_MULTI_QSET_SUPPORT + /* Configure Secondary Qsets, if any */ + for (sqs = 0; sqs < nic->sqs_count; sqs++) + if (nic->snicvf[sqs]) + nicvf_queue_reg_write(nic->snicvf[sqs], + NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); +#endif +} + +/* Configures receive queue */ +static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct rcv_queue *rq; + struct cmp_queue *cq; + struct rq_cfg rq_cfg; + + rq = &qs->rq[qidx]; + rq->enable = enable; + + /* Disable receive queue */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0); + + if (!rq->enable) { + nicvf_reclaim_rcv_queue(nic, qs, qidx); + return; + } + + rq->cq_qs = qs->vnic_id; + rq->cq_idx = qidx; + rq->start_rbdr_qs = qs->vnic_id; + rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1; + rq->cont_rbdr_qs = qs->vnic_id; + rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1; + /* all writes of RBDR data to be loaded into L2 Cache as well*/ + rq->caching = 1; + + /* Send a mailbox msg to PF to config RQ */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; + mbx.rq.qs_num = qs->vnic_id; + mbx.rq.rq_num = qidx; + mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) | + (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) | + (rq->cont_qs_rbdr_idx << 8) | + (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx); + nicvf_send_msg_to_pf(nic, &mbx); + + mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG; + mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0); + nicvf_send_msg_to_pf(nic, &mbx); + + /* RQ drop config + * Enable CQ drop to reserve sufficient CQEs for all tx packets + */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG; + mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8); + nicvf_send_msg_to_pf(nic, &mbx); + + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00); + if (!nic->sqs_mode) + nicvf_config_vlan_stripping(nic, nic->netdev->features); + + /* Enable Receive queue */ + rq_cfg.ena = 1; + rq_cfg.tcp_ena = 0; + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg); +} + +/* Configures completion queue */ +void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct cmp_queue *cq; + struct cq_cfg cq_cfg; + + cq = &qs->cq[qidx]; + cq->enable = enable; + + if (!cq->enable) { + nicvf_reclaim_cmp_queue(nic, qs, qidx); + return; + } + + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); + + if (!cq->enable) + return; + + spin_lock_init(&cq->lock); + /* Set completion queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, + qidx, (u64)(cq->dmem.phys_base)); + + /* Enable Completion queue */ + cq_cfg.ena = 1; + cq_cfg.reset = 0; + cq_cfg.caching = 0; + cq_cfg.qsize = CMP_QSIZE; + cq_cfg.avg_con = 0; + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, + qidx, nic->cq_coalesce_usecs); +} + +/* Configures transmit queue */ +static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct snd_queue *sq; + struct sq_cfg sq_cfg; + + sq = &qs->sq[qidx]; + sq->enable = enable; + + if (!sq->enable) { + nicvf_reclaim_snd_queue(nic, qs, qidx); + return; + } + + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); + + sq->cq_qs = qs->vnic_id; + sq->cq_idx = qidx; + + /* Send a mailbox msg to PF to config SQ */ + mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG; + mbx.sq.qs_num = qs->vnic_id; + mbx.sq.sq_num = qidx; + mbx.sq.sqs_mode = nic->sqs_mode; + mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx; + nicvf_send_msg_to_pf(nic, &mbx); + + /* Set queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, + qidx, (u64)(sq->dmem.phys_base)); + + /* Enable send queue & set queue size */ + sq_cfg.ena = 1; + sq_cfg.reset = 0; + sq_cfg.ldwb = 0; + sq_cfg.qsize = SND_QSIZE; + sq_cfg.tstmp_bgx_intf = 0; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh); +} + +/* Configures receive buffer descriptor ring */ +static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct rbdr *rbdr; + struct rbdr_cfg rbdr_cfg; + + rbdr = &qs->rbdr[qidx]; + nicvf_reclaim_rbdr(nic, rbdr, qidx); + if (!enable) + return; + + /* Set descriptor base address */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, + qidx, (u64)(rbdr->dmem.phys_base)); + + /* Enable RBDR & set queue size */ + /* Buffer size should be in multiples of 128 bytes */ + rbdr_cfg.ena = 1; + rbdr_cfg.reset = 0; + rbdr_cfg.ldwb = 0; + rbdr_cfg.qsize = RBDR_SIZE; + rbdr_cfg.avg_con = 0; + rbdr_cfg.lines = rbdr->dma_size / 128; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, *(u64 *)&rbdr_cfg); + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + qidx, qs->rbdr_len - 1); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, + qidx, rbdr->thresh - 1); +} + +/* Requests PF to assign and enable Qset */ +void nicvf_qset_config(struct nicvf *nic, bool enable) +{ + union nic_mbx mbx = {}; + struct queue_set *qs = nic->qs; + struct qs_cfg *qs_cfg; + + if (!qs) { + netdev_warn(nic->netdev, + "Qset is still not allocated, don't init queues\n"); + return; + } + + qs->enable = enable; + qs->vnic_id = nic->vf_id; + + /* Send a mailbox msg to PF to config Qset */ + mbx.qs.msg = NIC_MBOX_MSG_QS_CFG; + mbx.qs.num = qs->vnic_id; +#ifdef VNIC_MULTI_QSET_SUPPORT + mbx.qs.sqs_count = nic->sqs_count; +#endif + + mbx.qs.cfg = 0; + qs_cfg = (struct qs_cfg *)&mbx.qs.cfg; + if (qs->enable) { + qs_cfg->ena = 1; +#ifdef __BIG_ENDIAN + qs_cfg->be = 1; +#endif + qs_cfg->vnic = qs->vnic_id; + } + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_free_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Free receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_free_rbdr(nic, &qs->rbdr[qidx]); + + /* Free completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_free_cmp_queue(nic, &qs->cq[qidx]); + + /* Free send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_free_snd_queue(nic, &qs->sq[qidx]); +} + +static int nicvf_alloc_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Alloc receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len, + DMA_BUFFER_LEN)) + goto alloc_fail; + } + + /* Alloc send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) { + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len)) + goto alloc_fail; + } + + /* Alloc completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len)) + goto alloc_fail; + } + + return 0; +alloc_fail: + nicvf_free_resources(nic); + return -ENOMEM; +} + +int nicvf_set_qset_resources(struct nicvf *nic) +{ + struct queue_set *qs; + + qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL); + if (!qs) + return -ENOMEM; + nic->qs = qs; + + /* Set count of each queue */ + qs->rbdr_cnt = RBDR_CNT; +#ifdef VNIC_RSS_SUPPORT + qs->rq_cnt = RCV_QUEUE_CNT; +#else + qs->rq_cnt = 1; +#endif + qs->sq_cnt = SND_QUEUE_CNT; + qs->cq_cnt = CMP_QUEUE_CNT; + + /* Set queue lengths */ + qs->rbdr_len = RCV_BUF_COUNT; + qs->sq_len = SND_QUEUE_LEN; + qs->cq_len = CMP_QUEUE_LEN; + + nic->rx_queues = qs->rq_cnt; + nic->tx_queues = qs->sq_cnt; + + return 0; +} + +int nicvf_config_data_transfer(struct nicvf *nic, bool enable) +{ + bool disable = false; + struct queue_set *qs = nic->qs; + int qidx; + + if (!qs) + return 0; + + if (enable) { + if (nicvf_alloc_resources(nic)) + return -ENOMEM; + + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, enable); + } else { + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, disable); + + nicvf_free_resources(nic); + } + + return 0; +} + +/* Get a free desc from SQ + * returns descriptor ponter & descriptor number + */ +static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + int qentry; + + qentry = sq->tail; + atomic_sub(desc_cnt, &sq->free_cnt); + sq->tail += desc_cnt; + sq->tail &= (sq->dmem.q_len - 1); + + return qentry; +} + +/* Free descriptor back to SQ for future use */ +void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + atomic_add(desc_cnt, &sq->free_cnt); + sq->head += desc_cnt; + sq->head &= (sq->dmem.q_len - 1); +} + +static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry) +{ + qentry++; + qentry &= (sq->dmem.q_len - 1); + return qentry; +} + +void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg |= NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); + /* Ring doorbell so that H/W restarts processing SQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0); +} + +void nicvf_sq_disable(struct nicvf *nic, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg &= ~NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); +} + +void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, + int qidx) +{ + u64 head, tail; + struct sk_buff *skb; + struct nicvf *nic = netdev_priv(netdev); + struct sq_hdr_subdesc *hdr; + + head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4; + tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4; + while (sq->head != head) { + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); + if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) { + nicvf_put_sq_desc(sq, 1); + continue; + } + skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); + atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets); + atomic64_add(hdr->tot_len, + (atomic64_t *)&netdev->stats.tx_bytes); + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + } +} + +/* Get the number of SQ descriptors needed to xmit this skb */ +static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + + if (skb_shinfo(skb)->gso_size) { + subdesc_cnt = nicvf_tso_count_subdescs(skb); + return subdesc_cnt; + } + + if (skb_shinfo(skb)->nr_frags) + subdesc_cnt += skb_shinfo(skb)->nr_frags; + + return subdesc_cnt; +} + +/* Add SQ HEADER subdescriptor. + * First subdescriptor for every send descriptor. + */ +static inline void +nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, struct sk_buff *skb, int len) +{ + int proto; + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + sq->skbuff[qentry] = (u64)skb; + + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* No of subdescriptors following this */ + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + + /* Offload checksum calculation to HW */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + hdr->csum_l3 = 1; /* Enable IP csum calculation */ + hdr->l3_offset = skb_network_offset(skb); + hdr->l4_offset = skb_transport_offset(skb); + + proto = ip_hdr(skb)->protocol; + switch (proto) { + case IPPROTO_TCP: + hdr->csum_l4 = SEND_L4_CSUM_TCP; + break; + case IPPROTO_UDP: + hdr->csum_l4 = SEND_L4_CSUM_UDP; + break; + case IPPROTO_SCTP: + hdr->csum_l4 = SEND_L4_CSUM_SCTP; + break; + } + } +} + +/* SQ GATHER subdescriptor + * Must follow HDR descriptor + */ +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data) +{ + struct sq_gather_subdesc *gather; + + qentry &= (sq->dmem.q_len - 1); + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry); + + memset(gather, 0, SND_QUEUE_DESC_SIZE); + gather->subdesc_type = SQ_DESC_TYPE_GATHER; + gather->ld_type = NIC_SEND_LD_TYPE_E_LDD; + gather->size = size; + gather->addr = data; +} + +/* Append an skb to a SQ for packet transfer. */ +int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) +{ + int i, size; + int subdesc_cnt; + int sq_num, qentry; + struct queue_set *qs; + struct snd_queue *sq; + + sq_num = skb_get_queue_mapping(skb); +#ifdef VNIC_MULTI_QSET_SUPPORT + if (sq_num >= MAX_SND_QUEUES_PER_QS) { + /* Get secondary Qset's SQ structure */ + i = sq_num / MAX_SND_QUEUES_PER_QS; + if (!nic->snicvf[i - 1]) { + netdev_warn(nic->netdev, + "Secondary Qset#%d's ptr not initialized\n", + i - 1); + return 1; + } + nic = (struct nicvf *)nic->snicvf[i - 1]; + sq_num = sq_num % MAX_SND_QUEUES_PER_QS; + } +#endif + + qs = nic->qs; + sq = &qs->sq[sq_num]; + + subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); + if (subdesc_cnt > atomic_read(&sq->free_cnt)) + goto append_fail; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + /* Add SQ header subdesc */ + nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len); + + /* Add SQ gather subdescs */ + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; + nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data)); + + /* Check for scattered buffer */ + if (!skb_is_nonlinear(skb)) + goto doorbell; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_frag_size(frag); + nicvf_sq_add_gather_subdesc(sq, qentry, size, + virt_to_phys( + skb_frag_address(frag))); + } + +doorbell: + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit new packet */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, subdesc_cnt); + return 1; + +append_fail: + /* Use original PCI dev for debug log */ + nic = nic->pnicvf; + netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n"); + return 0; +} + +static inline unsigned frag_num(unsigned i) +{ +#ifdef __BIG_ENDIAN + return (i & ~3) + 3 - (i & 3); +#else + return i; +#endif +} + +/* Returns SKB for a received packet */ +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +{ + int frag; + int payload_len = 0; + struct sk_buff *skb = NULL; + struct sk_buff *skb_frag = NULL; + struct sk_buff *prev_frag = NULL; + u16 *rb_lens = NULL; + u64 *rb_ptrs = NULL; + + rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); + rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + + netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", + __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); + + for (frag = 0; frag < cqe_rx->rb_cnt; frag++) { + payload_len = rb_lens[frag_num(frag)]; + if (!frag) { + /* First fragment */ + skb = nicvf_rb_ptr_to_skb(nic, + *rb_ptrs - cqe_rx->align_pad, + payload_len); + if (!skb) + return NULL; + skb_reserve(skb, cqe_rx->align_pad); + skb_put(skb, payload_len); + } else { + /* Add fragments */ + skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs, + payload_len); + if (!skb_frag) { + dev_kfree_skb(skb); + return NULL; + } + + if (!skb_shinfo(skb)->frag_list) + skb_shinfo(skb)->frag_list = skb_frag; + else + prev_frag->next = skb_frag; + + prev_frag = skb_frag; + skb->len += payload_len; + skb->data_len += payload_len; + skb_frag->len = payload_len; + } + /* Next buffer pointer */ + rb_ptrs++; + } + return skb; +} + +/* Enable interrupt */ +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val; + + reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to enable interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val); +} + +/* Disable interrupt */ +void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val = 0; + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to disable interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val); +} + +/* Clear interrupt */ +void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val = 0; + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to clear interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_INT, reg_val); +} + +/* Check if interrupt is enabled */ +int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val; + u64 mask = 0xff; + + reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); + + switch (int_type) { + case NICVF_INTR_CQ: + mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + mask = NICVF_INTR_PKT_DROP_MASK; + break; + case NICVF_INTR_TCP_TIMER: + mask = NICVF_INTR_TCP_TIMER_MASK; + break; + case NICVF_INTR_MBOX: + mask = NICVF_INTR_MBOX_MASK; + break; + case NICVF_INTR_QS_ERR: + mask = NICVF_INTR_QS_ERR_MASK; + break; + default: + netdev_err(nic->netdev, + "Failed to check interrupt enable: unknown type\n"); + break; + } + + return (reg_val & mask); +} + +void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx) +{ + struct rcv_queue *rq; + +#define GET_RQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\ + (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + rq = &nic->qs->rq[rq_idx]; + rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS); + rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS); +} + +void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) +{ + struct snd_queue *sq; + +#define GET_SQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\ + (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + sq = &nic->qs->sq[sq_idx]; + sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS); + sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS); +} + +/* Check for errors in the receive cmp.queue entry */ +int nicvf_check_cqe_rx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_rx_t *cqe_rx) +{ + struct nicvf_hw_stats *stats = &nic->hw_stats; + struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + + if (!cqe_rx->err_level && !cqe_rx->err_opcode) { + drv_stats->rx_frames_ok++; + return 0; + } + + if (netif_msg_rx_err(nic)) + netdev_err(nic->netdev, + "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", + nic->netdev->name, + cqe_rx->err_level, cqe_rx->err_opcode); + + switch (cqe_rx->err_opcode) { + case CQ_RX_ERROP_RE_PARTIAL: + stats->rx_bgx_truncated_pkts++; + break; + case CQ_RX_ERROP_RE_JABBER: + stats->rx_jabber_errs++; + break; + case CQ_RX_ERROP_RE_FCS: + stats->rx_fcs_errs++; + break; + case CQ_RX_ERROP_RE_RX_CTL: + stats->rx_bgx_errs++; + break; + case CQ_RX_ERROP_PREL2_ERR: + stats->rx_prel2_errs++; + break; + case CQ_RX_ERROP_L2_MAL: + stats->rx_l2_hdr_malformed++; + break; + case CQ_RX_ERROP_L2_OVERSIZE: + stats->rx_oversize++; + break; + case CQ_RX_ERROP_L2_UNDERSIZE: + stats->rx_undersize++; + break; + case CQ_RX_ERROP_L2_LENMISM: + stats->rx_l2_len_mismatch++; + break; + case CQ_RX_ERROP_L2_PCLP: + stats->rx_l2_pclp++; + break; + case CQ_RX_ERROP_IP_NOT: + stats->rx_ip_ver_errs++; + break; + case CQ_RX_ERROP_IP_CSUM_ERR: + stats->rx_ip_csum_errs++; + break; + case CQ_RX_ERROP_IP_MAL: + stats->rx_ip_hdr_malformed++; + break; + case CQ_RX_ERROP_IP_MALD: + stats->rx_ip_payload_malformed++; + break; + case CQ_RX_ERROP_IP_HOP: + stats->rx_ip_ttl_errs++; + break; + case CQ_RX_ERROP_L3_PCLP: + stats->rx_l3_pclp++; + break; + case CQ_RX_ERROP_L4_MAL: + stats->rx_l4_malformed++; + break; + case CQ_RX_ERROP_L4_CHK: + stats->rx_l4_csum_errs++; + break; + case CQ_RX_ERROP_UDP_LEN: + stats->rx_udp_len_errs++; + break; + case CQ_RX_ERROP_L4_PORT: + stats->rx_l4_port_errs++; + break; + case CQ_RX_ERROP_TCP_FLAG: + stats->rx_tcp_flag_errs++; + break; + case CQ_RX_ERROP_TCP_OFFSET: + stats->rx_tcp_offset_errs++; + break; + case CQ_RX_ERROP_L4_PCLP: + stats->rx_l4_pclp++; + break; + case CQ_RX_ERROP_RBDR_TRUNC: + stats->rx_truncated_pkts++; + break; + } + + return 1; +} + +/* Check for errors in the send cmp.queue entry */ +int nicvf_check_cqe_tx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_send_t *cqe_tx) +{ + struct cmp_queue_stats *stats = &cq->stats; + + switch (cqe_tx->send_status) { + case CQ_TX_ERROP_GOOD: + stats->tx.good++; + return 0; + case CQ_TX_ERROP_DESC_FAULT: + stats->tx.desc_fault++; + break; + case CQ_TX_ERROP_HDR_CONS_ERR: + stats->tx.hdr_cons_err++; + break; + case CQ_TX_ERROP_SUBDC_ERR: + stats->tx.subdesc_err++; + break; + case CQ_TX_ERROP_IMM_SIZE_OFLOW: + stats->tx.imm_size_oflow++; + break; + case CQ_TX_ERROP_DATA_SEQUENCE_ERR: + stats->tx.data_seq_err++; + break; + case CQ_TX_ERROP_MEM_SEQUENCE_ERR: + stats->tx.mem_seq_err++; + break; + case CQ_TX_ERROP_LOCK_VIOL: + stats->tx.lock_viol++; + break; + case CQ_TX_ERROP_DATA_FAULT: + stats->tx.data_fault++; + break; + case CQ_TX_ERROP_TSTMP_CONFLICT: + stats->tx.tstmp_conflict++; + break; + case CQ_TX_ERROP_TSTMP_TIMEOUT: + stats->tx.tstmp_timeout++; + break; + case CQ_TX_ERROP_MEM_FAULT: + stats->tx.mem_fault++; + break; + case CQ_TX_ERROP_CK_OVERLAP: + stats->tx.csum_overlap++; + break; + case CQ_TX_ERROP_CK_OFLOW: + stats->tx.csum_overflow++; + break; + } + + return 1; +} diff --git a/sys/dev/vnic/nicvf_queues.h b/sys/dev/vnic/nicvf_queues.h new file mode 100644 index 000000000000..d5842e928453 --- /dev/null +++ b/sys/dev/vnic/nicvf_queues.h @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef NICVF_QUEUES_H +#define NICVF_QUEUES_H + +#include <linux/netdevice.h> +#include "q_struct.h" + +#define MAX_QUEUE_SET 128 +#define MAX_RCV_QUEUES_PER_QS 8 +#define MAX_RCV_BUF_DESC_RINGS_PER_QS 2 +#define MAX_SND_QUEUES_PER_QS 8 +#define MAX_CMP_QUEUES_PER_QS 8 + +/* VF's queue interrupt ranges */ +#define NICVF_INTR_ID_CQ 0 +#define NICVF_INTR_ID_SQ 8 +#define NICVF_INTR_ID_RBDR 16 +#define NICVF_INTR_ID_MISC 18 +#define NICVF_INTR_ID_QS_ERR 19 + +#define for_each_cq_irq(irq) \ + for (irq = NICVF_INTR_ID_CQ; irq < NICVF_INTR_ID_SQ; irq++) +#define for_each_sq_irq(irq) \ + for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_RBDR; irq++) +#define for_each_rbdr_irq(irq) \ + for (irq = NICVF_INTR_ID_RBDR; irq < NICVF_INTR_ID_MISC; irq++) + +#define RBDR_SIZE0 0ULL /* 8K entries */ +#define RBDR_SIZE1 1ULL /* 16K entries */ +#define RBDR_SIZE2 2ULL /* 32K entries */ +#define RBDR_SIZE3 3ULL /* 64K entries */ +#define RBDR_SIZE4 4ULL /* 126K entries */ +#define RBDR_SIZE5 5ULL /* 256K entries */ +#define RBDR_SIZE6 6ULL /* 512K entries */ + +#define SND_QUEUE_SIZE0 0ULL /* 1K entries */ +#define SND_QUEUE_SIZE1 1ULL /* 2K entries */ +#define SND_QUEUE_SIZE2 2ULL /* 4K entries */ +#define SND_QUEUE_SIZE3 3ULL /* 8K entries */ +#define SND_QUEUE_SIZE4 4ULL /* 16K entries */ +#define SND_QUEUE_SIZE5 5ULL /* 32K entries */ +#define SND_QUEUE_SIZE6 6ULL /* 64K entries */ + +#define CMP_QUEUE_SIZE0 0ULL /* 1K entries */ +#define CMP_QUEUE_SIZE1 1ULL /* 2K entries */ +#define CMP_QUEUE_SIZE2 2ULL /* 4K entries */ +#define CMP_QUEUE_SIZE3 3ULL /* 8K entries */ +#define CMP_QUEUE_SIZE4 4ULL /* 16K entries */ +#define CMP_QUEUE_SIZE5 5ULL /* 32K entries */ +#define CMP_QUEUE_SIZE6 6ULL /* 64K entries */ + +/* Default queue count per QS, its lengths and threshold values */ +#define RBDR_CNT 1 +#define RCV_QUEUE_CNT 8 +#define SND_QUEUE_CNT 8 +#define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ + +#define SND_QSIZE SND_QUEUE_SIZE2 +#define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) +#define MAX_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE6 + 10)) +#define SND_QUEUE_THRESH 2ULL +#define MIN_SQ_DESC_PER_PKT_XMIT 2 +/* Since timestamp not enabled, otherwise 2 */ +#define MAX_CQE_PER_PKT_XMIT 1 + +/* Keep CQ and SQ sizes same, if timestamping + * is enabled this equation will change. + */ +#define CMP_QSIZE CMP_QUEUE_SIZE2 +#define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) +#define CMP_QUEUE_CQE_THRESH 0 +#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ + +#define RBDR_SIZE RBDR_SIZE0 +#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) +#define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) +#define RBDR_THRESH (RCV_BUF_COUNT / 2) +#define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ +#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ + (NICVF_RCV_BUF_ALIGN_BYTES * 2)) +#define RCV_DATA_OFFSET NICVF_RCV_BUF_ALIGN_BYTES + +#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ + MAX_CQE_PER_PKT_XMIT) +/* Calculate number of CQEs to reserve for all SQEs. + * Its 1/256th level of CQ size. + * '+ 1' to account for pipelining + */ +#define RQ_CQ_DROP ((256 / (CMP_QUEUE_LEN / \ + (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1) + +/* Descriptor size in bytes */ +#define SND_QUEUE_DESC_SIZE 16 +#define CMP_QUEUE_DESC_SIZE 512 + +/* Buffer / descriptor alignments */ +#define NICVF_RCV_BUF_ALIGN 7 +#define NICVF_RCV_BUF_ALIGN_BYTES (1ULL << NICVF_RCV_BUF_ALIGN) +#define NICVF_CQ_BASE_ALIGN_BYTES 512 /* 9 bits */ +#define NICVF_SQ_BASE_ALIGN_BYTES 128 /* 7 bits */ + +#define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES) ALIGN(ADDR, ALIGN_BYTES) +#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\ + (NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES) +#define NICVF_RCV_BUF_ALIGN_LEN(X)\ + (NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X) + +/* Queue enable/disable */ +#define NICVF_SQ_EN BIT_ULL(19) + +/* Queue reset */ +#define NICVF_CQ_RESET BIT_ULL(41) +#define NICVF_SQ_RESET BIT_ULL(17) +#define NICVF_RBDR_RESET BIT_ULL(43) + +enum CQ_RX_ERRLVL_E { + CQ_ERRLVL_MAC, + CQ_ERRLVL_L2, + CQ_ERRLVL_L3, + CQ_ERRLVL_L4, +}; + +enum CQ_RX_ERROP_E { + CQ_RX_ERROP_RE_NONE = 0x0, + CQ_RX_ERROP_RE_PARTIAL = 0x1, + CQ_RX_ERROP_RE_JABBER = 0x2, + CQ_RX_ERROP_RE_FCS = 0x7, + CQ_RX_ERROP_RE_TERMINATE = 0x9, + CQ_RX_ERROP_RE_RX_CTL = 0xb, + CQ_RX_ERROP_PREL2_ERR = 0x1f, + CQ_RX_ERROP_L2_FRAGMENT = 0x20, + CQ_RX_ERROP_L2_OVERRUN = 0x21, + CQ_RX_ERROP_L2_PFCS = 0x22, + CQ_RX_ERROP_L2_PUNY = 0x23, + CQ_RX_ERROP_L2_MAL = 0x24, + CQ_RX_ERROP_L2_OVERSIZE = 0x25, + CQ_RX_ERROP_L2_UNDERSIZE = 0x26, + CQ_RX_ERROP_L2_LENMISM = 0x27, + CQ_RX_ERROP_L2_PCLP = 0x28, + CQ_RX_ERROP_IP_NOT = 0x41, + CQ_RX_ERROP_IP_CSUM_ERR = 0x42, + CQ_RX_ERROP_IP_MAL = 0x43, + CQ_RX_ERROP_IP_MALD = 0x44, + CQ_RX_ERROP_IP_HOP = 0x45, + CQ_RX_ERROP_L3_ICRC = 0x46, + CQ_RX_ERROP_L3_PCLP = 0x47, + CQ_RX_ERROP_L4_MAL = 0x61, + CQ_RX_ERROP_L4_CHK = 0x62, + CQ_RX_ERROP_UDP_LEN = 0x63, + CQ_RX_ERROP_L4_PORT = 0x64, + CQ_RX_ERROP_TCP_FLAG = 0x65, + CQ_RX_ERROP_TCP_OFFSET = 0x66, + CQ_RX_ERROP_L4_PCLP = 0x67, + CQ_RX_ERROP_RBDR_TRUNC = 0x70, +}; + +enum CQ_TX_ERROP_E { + CQ_TX_ERROP_GOOD = 0x0, + CQ_TX_ERROP_DESC_FAULT = 0x10, + CQ_TX_ERROP_HDR_CONS_ERR = 0x11, + CQ_TX_ERROP_SUBDC_ERR = 0x12, + CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80, + CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81, + CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82, + CQ_TX_ERROP_LOCK_VIOL = 0x83, + CQ_TX_ERROP_DATA_FAULT = 0x84, + CQ_TX_ERROP_TSTMP_CONFLICT = 0x85, + CQ_TX_ERROP_TSTMP_TIMEOUT = 0x86, + CQ_TX_ERROP_MEM_FAULT = 0x87, + CQ_TX_ERROP_CK_OVERLAP = 0x88, + CQ_TX_ERROP_CK_OFLOW = 0x89, + CQ_TX_ERROP_ENUM_LAST = 0x8a, +}; + +struct cmp_queue_stats { + struct tx_stats { + u64 good; + u64 desc_fault; + u64 hdr_cons_err; + u64 subdesc_err; + u64 imm_size_oflow; + u64 data_seq_err; + u64 mem_seq_err; + u64 lock_viol; + u64 data_fault; + u64 tstmp_conflict; + u64 tstmp_timeout; + u64 mem_fault; + u64 csum_overlap; + u64 csum_overflow; + } tx; +} ____cacheline_aligned_in_smp; + +enum RQ_SQ_STATS { + RQ_SQ_STATS_OCTS, + RQ_SQ_STATS_PKTS, +}; + +struct rx_tx_queue_stats { + u64 bytes; + u64 pkts; +} ____cacheline_aligned_in_smp; + +struct q_desc_mem { + dma_addr_t dma; + u64 size; + u16 q_len; + dma_addr_t phys_base; + void *base; + void *unalign_base; +}; + +struct rbdr { + bool enable; + u32 dma_size; + u32 frag_len; + u32 thresh; /* Threshold level for interrupt */ + void *desc; + u32 head; + u32 tail; + struct q_desc_mem dmem; +} ____cacheline_aligned_in_smp; + +struct rcv_queue { + bool enable; + struct rbdr *rbdr_start; + struct rbdr *rbdr_cont; + bool en_tcp_reassembly; + u8 cq_qs; /* CQ's QS to which this RQ is assigned */ + u8 cq_idx; /* CQ index (0 to 7) in the QS */ + u8 cont_rbdr_qs; /* Continue buffer ptrs - QS num */ + u8 cont_qs_rbdr_idx; /* RBDR idx in the cont QS */ + u8 start_rbdr_qs; /* First buffer ptrs - QS num */ + u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */ + u8 caching; + struct rx_tx_queue_stats stats; +} ____cacheline_aligned_in_smp; + +struct cmp_queue { + bool enable; + u16 thresh; + spinlock_t lock; /* lock to serialize processing CQEs */ + void *desc; + struct q_desc_mem dmem; + struct cmp_queue_stats stats; + int irq; +} ____cacheline_aligned_in_smp; + +struct snd_queue { + bool enable; + u8 cq_qs; /* CQ's QS to which this SQ is pointing */ + u8 cq_idx; /* CQ index (0 to 7) in the above QS */ + u16 thresh; + atomic_t free_cnt; + u32 head; + u32 tail; + u64 *skbuff; + void *desc; + + struct q_desc_mem dmem; + struct rx_tx_queue_stats stats; +} ____cacheline_aligned_in_smp; + +struct queue_set { + bool enable; + bool be_en; + u8 vnic_id; + u8 rq_cnt; + u8 cq_cnt; + u64 cq_len; + u8 sq_cnt; + u64 sq_len; + u8 rbdr_cnt; + u64 rbdr_len; + struct rcv_queue rq[MAX_RCV_QUEUES_PER_QS]; + struct cmp_queue cq[MAX_CMP_QUEUES_PER_QS]; + struct snd_queue sq[MAX_SND_QUEUES_PER_QS]; + struct rbdr rbdr[MAX_RCV_BUF_DESC_RINGS_PER_QS]; +} ____cacheline_aligned_in_smp; + +#define GET_RBDR_DESC(RING, idx)\ + (&(((struct rbdr_entry_t *)((RING)->desc))[idx])) +#define GET_SQ_DESC(RING, idx)\ + (&(((struct sq_hdr_subdesc *)((RING)->desc))[idx])) +#define GET_CQ_DESC(RING, idx)\ + (&(((union cq_desc_t *)((RING)->desc))[idx])) + +/* CQ status bits */ +#define CQ_WR_FULL BIT(26) +#define CQ_WR_DISABLE BIT(25) +#define CQ_WR_FAULT BIT(24) +#define CQ_CQE_COUNT (0xFFFF << 0) + +#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) + +void nicvf_config_vlan_stripping(struct nicvf *nic, + netdev_features_t features); +int nicvf_set_qset_resources(struct nicvf *nic); +int nicvf_config_data_transfer(struct nicvf *nic, bool enable); +void nicvf_qset_config(struct nicvf *nic, bool enable); +void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable); + +void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx); +void nicvf_sq_disable(struct nicvf *nic, int qidx); +void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt); +void nicvf_sq_free_used_descs(struct net_device *netdev, + struct snd_queue *sq, int qidx); +int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb); + +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); +void nicvf_rbdr_task(unsigned long data); +void nicvf_rbdr_work(struct work_struct *work); + +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx); +void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx); +void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx); +int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx); + +/* Register access APIs */ +void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val); +u64 nicvf_reg_read(struct nicvf *nic, u64 offset); +void nicvf_qset_reg_write(struct nicvf *nic, u64 offset, u64 val); +u64 nicvf_qset_reg_read(struct nicvf *nic, u64 offset); +void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, + u64 qidx, u64 val); +u64 nicvf_queue_reg_read(struct nicvf *nic, + u64 offset, u64 qidx); + +/* Stats */ +void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx); +void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx); +int nicvf_check_cqe_rx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_rx_t *cqe_rx); +int nicvf_check_cqe_tx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_send_t *cqe_tx); +#endif /* NICVF_QUEUES_H */ diff --git a/sys/dev/vnic/q_struct.h b/sys/dev/vnic/q_struct.h new file mode 100644 index 000000000000..6d33c7b097e2 --- /dev/null +++ b/sys/dev/vnic/q_struct.h @@ -0,0 +1,719 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef Q_STRUCT_H +#define Q_STRUCT_H + +/* Load transaction types for reading segment bytes specified by + * NIC_SEND_GATHER_S[LD_TYPE]. + */ +enum nic_send_ld_type_e { + NIC_SEND_LD_TYPE_E_LDD = 0x0, + NIC_SEND_LD_TYPE_E_LDT = 0x1, + NIC_SEND_LD_TYPE_E_LDWB = 0x2, + NIC_SEND_LD_TYPE_E_ENUM_LAST = 0x3, +}; + +enum ether_type_algorithm { + ETYPE_ALG_NONE = 0x0, + ETYPE_ALG_SKIP = 0x1, + ETYPE_ALG_ENDPARSE = 0x2, + ETYPE_ALG_VLAN = 0x3, + ETYPE_ALG_VLAN_STRIP = 0x4, +}; + +enum layer3_type { + L3TYPE_NONE = 0x00, + L3TYPE_GRH = 0x01, + L3TYPE_IPV4 = 0x04, + L3TYPE_IPV4_OPTIONS = 0x05, + L3TYPE_IPV6 = 0x06, + L3TYPE_IPV6_OPTIONS = 0x07, + L3TYPE_ET_STOP = 0x0D, + L3TYPE_OTHER = 0x0E, +}; + +enum layer4_type { + L4TYPE_NONE = 0x00, + L4TYPE_IPSEC_ESP = 0x01, + L4TYPE_IPFRAG = 0x02, + L4TYPE_IPCOMP = 0x03, + L4TYPE_TCP = 0x04, + L4TYPE_UDP = 0x05, + L4TYPE_SCTP = 0x06, + L4TYPE_GRE = 0x07, + L4TYPE_ROCE_BTH = 0x08, + L4TYPE_OTHER = 0x0E, +}; + +/* CPI and RSSI configuration */ +enum cpi_algorithm_type { + CPI_ALG_NONE = 0x0, + CPI_ALG_VLAN = 0x1, + CPI_ALG_VLAN16 = 0x2, + CPI_ALG_DIFF = 0x3, +}; + +enum rss_algorithm_type { + RSS_ALG_NONE = 0x00, + RSS_ALG_PORT = 0x01, + RSS_ALG_IP = 0x02, + RSS_ALG_TCP_IP = 0x03, + RSS_ALG_UDP_IP = 0x04, + RSS_ALG_SCTP_IP = 0x05, + RSS_ALG_GRE_IP = 0x06, + RSS_ALG_ROCE = 0x07, +}; + +enum rss_hash_cfg { + RSS_HASH_L2ETC = 0x00, + RSS_HASH_IP = 0x01, + RSS_HASH_TCP = 0x02, + RSS_HASH_TCP_SYN_DIS = 0x03, + RSS_HASH_UDP = 0x04, + RSS_HASH_L4ETC = 0x05, + RSS_HASH_ROCE = 0x06, + RSS_L3_BIDI = 0x07, + RSS_L4_BIDI = 0x08, +}; + +/* Completion queue entry types */ +enum cqe_type { + CQE_TYPE_INVALID = 0x0, + CQE_TYPE_RX = 0x2, + CQE_TYPE_RX_SPLIT = 0x3, + CQE_TYPE_RX_TCP = 0x4, + CQE_TYPE_SEND = 0x8, + CQE_TYPE_SEND_PTP = 0x9, +}; + +enum cqe_rx_tcp_status { + CQE_RX_STATUS_VALID_TCP_CNXT = 0x00, + CQE_RX_STATUS_INVALID_TCP_CNXT = 0x0F, +}; + +enum cqe_send_status { + CQE_SEND_STATUS_GOOD = 0x00, + CQE_SEND_STATUS_DESC_FAULT = 0x01, + CQE_SEND_STATUS_HDR_CONS_ERR = 0x11, + CQE_SEND_STATUS_SUBDESC_ERR = 0x12, + CQE_SEND_STATUS_IMM_SIZE_OFLOW = 0x80, + CQE_SEND_STATUS_CRC_SEQ_ERR = 0x81, + CQE_SEND_STATUS_DATA_SEQ_ERR = 0x82, + CQE_SEND_STATUS_MEM_SEQ_ERR = 0x83, + CQE_SEND_STATUS_LOCK_VIOL = 0x84, + CQE_SEND_STATUS_LOCK_UFLOW = 0x85, + CQE_SEND_STATUS_DATA_FAULT = 0x86, + CQE_SEND_STATUS_TSTMP_CONFLICT = 0x87, + CQE_SEND_STATUS_TSTMP_TIMEOUT = 0x88, + CQE_SEND_STATUS_MEM_FAULT = 0x89, + CQE_SEND_STATUS_CSUM_OVERLAP = 0x8A, + CQE_SEND_STATUS_CSUM_OVERFLOW = 0x8B, +}; + +enum cqe_rx_tcp_end_reason { + CQE_RX_TCP_END_FIN_FLAG_DET = 0, + CQE_RX_TCP_END_INVALID_FLAG = 1, + CQE_RX_TCP_END_TIMEOUT = 2, + CQE_RX_TCP_END_OUT_OF_SEQ = 3, + CQE_RX_TCP_END_PKT_ERR = 4, + CQE_RX_TCP_END_QS_DISABLED = 0x0F, +}; + +/* Packet protocol level error enumeration */ +enum cqe_rx_err_level { + CQE_RX_ERRLVL_RE = 0x0, + CQE_RX_ERRLVL_L2 = 0x1, + CQE_RX_ERRLVL_L3 = 0x2, + CQE_RX_ERRLVL_L4 = 0x3, +}; + +/* Packet protocol level error type enumeration */ +enum cqe_rx_err_opcode { + CQE_RX_ERR_RE_NONE = 0x0, + CQE_RX_ERR_RE_PARTIAL = 0x1, + CQE_RX_ERR_RE_JABBER = 0x2, + CQE_RX_ERR_RE_FCS = 0x7, + CQE_RX_ERR_RE_TERMINATE = 0x9, + CQE_RX_ERR_RE_RX_CTL = 0xb, + CQE_RX_ERR_PREL2_ERR = 0x1f, + CQE_RX_ERR_L2_FRAGMENT = 0x20, + CQE_RX_ERR_L2_OVERRUN = 0x21, + CQE_RX_ERR_L2_PFCS = 0x22, + CQE_RX_ERR_L2_PUNY = 0x23, + CQE_RX_ERR_L2_MAL = 0x24, + CQE_RX_ERR_L2_OVERSIZE = 0x25, + CQE_RX_ERR_L2_UNDERSIZE = 0x26, + CQE_RX_ERR_L2_LENMISM = 0x27, + CQE_RX_ERR_L2_PCLP = 0x28, + CQE_RX_ERR_IP_NOT = 0x41, + CQE_RX_ERR_IP_CHK = 0x42, + CQE_RX_ERR_IP_MAL = 0x43, + CQE_RX_ERR_IP_MALD = 0x44, + CQE_RX_ERR_IP_HOP = 0x45, + CQE_RX_ERR_L3_ICRC = 0x46, + CQE_RX_ERR_L3_PCLP = 0x47, + CQE_RX_ERR_L4_MAL = 0x61, + CQE_RX_ERR_L4_CHK = 0x62, + CQE_RX_ERR_UDP_LEN = 0x63, + CQE_RX_ERR_L4_PORT = 0x64, + CQE_RX_ERR_TCP_FLAG = 0x65, + CQE_RX_ERR_TCP_OFFSET = 0x66, + CQE_RX_ERR_L4_PCLP = 0x67, + CQE_RX_ERR_RBDR_TRUNC = 0x70, +}; + +struct cqe_rx_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 stdn_fault:1; + u64 rsvd0:1; + u64 rq_qs:7; + u64 rq_idx:3; + u64 rsvd1:12; + u64 rss_alg:4; + u64 rsvd2:4; + u64 rb_cnt:4; + u64 vlan_found:1; + u64 vlan_stripped:1; + u64 vlan2_found:1; + u64 vlan2_stripped:1; + u64 l4_type:4; + u64 l3_type:4; + u64 l2_present:1; + u64 err_level:3; + u64 err_opcode:8; + + u64 pkt_len:16; /* W1 */ + u64 l2_ptr:8; + u64 l3_ptr:8; + u64 l4_ptr:8; + u64 cq_pkt_len:8; + u64 align_pad:3; + u64 rsvd3:1; + u64 chan:12; + + u64 rss_tag:32; /* W2 */ + u64 vlan_tci:16; + u64 vlan_ptr:8; + u64 vlan2_ptr:8; + + u64 rb3_sz:16; /* W3 */ + u64 rb2_sz:16; + u64 rb1_sz:16; + u64 rb0_sz:16; + + u64 rb7_sz:16; /* W4 */ + u64 rb6_sz:16; + u64 rb5_sz:16; + u64 rb4_sz:16; + + u64 rb11_sz:16; /* W5 */ + u64 rb10_sz:16; + u64 rb9_sz:16; + u64 rb8_sz:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 err_opcode:8; + u64 err_level:3; + u64 l2_present:1; + u64 l3_type:4; + u64 l4_type:4; + u64 vlan2_stripped:1; + u64 vlan2_found:1; + u64 vlan_stripped:1; + u64 vlan_found:1; + u64 rb_cnt:4; + u64 rsvd2:4; + u64 rss_alg:4; + u64 rsvd1:12; + u64 rq_idx:3; + u64 rq_qs:7; + u64 rsvd0:1; + u64 stdn_fault:1; + u64 cqe_type:4; /* W0 */ + u64 chan:12; + u64 rsvd3:1; + u64 align_pad:3; + u64 cq_pkt_len:8; + u64 l4_ptr:8; + u64 l3_ptr:8; + u64 l2_ptr:8; + u64 pkt_len:16; /* W1 */ + u64 vlan2_ptr:8; + u64 vlan_ptr:8; + u64 vlan_tci:16; + u64 rss_tag:32; /* W2 */ + u64 rb0_sz:16; + u64 rb1_sz:16; + u64 rb2_sz:16; + u64 rb3_sz:16; /* W3 */ + u64 rb4_sz:16; + u64 rb5_sz:16; + u64 rb6_sz:16; + u64 rb7_sz:16; /* W4 */ + u64 rb8_sz:16; + u64 rb9_sz:16; + u64 rb10_sz:16; + u64 rb11_sz:16; /* W5 */ +#endif + u64 rb0_ptr:64; + u64 rb1_ptr:64; + u64 rb2_ptr:64; + u64 rb3_ptr:64; + u64 rb4_ptr:64; + u64 rb5_ptr:64; + u64 rb6_ptr:64; + u64 rb7_ptr:64; + u64 rb8_ptr:64; + u64 rb9_ptr:64; + u64 rb10_ptr:64; + u64 rb11_ptr:64; +}; + +struct cqe_rx_tcp_err_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:60; + + u64 rsvd1:4; /* W1 */ + u64 partial_first:1; + u64 rsvd2:27; + u64 rbdr_bytes:8; + u64 rsvd3:24; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 rsvd0:60; + u64 cqe_type:4; + + u64 rsvd3:24; + u64 rbdr_bytes:8; + u64 rsvd2:27; + u64 partial_first:1; + u64 rsvd1:4; +#endif +}; + +struct cqe_rx_tcp_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:52; + u64 cq_tcp_status:8; + + u64 rsvd1:32; /* W1 */ + u64 tcp_cntx_bytes:8; + u64 rsvd2:8; + u64 tcp_err_bytes:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 cq_tcp_status:8; + u64 rsvd0:52; + u64 cqe_type:4; /* W0 */ + + u64 tcp_err_bytes:16; + u64 rsvd2:8; + u64 tcp_cntx_bytes:8; + u64 rsvd1:32; /* W1 */ +#endif +}; + +struct cqe_send_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:4; + u64 sqe_ptr:16; + u64 rsvd1:4; + u64 rsvd2:10; + u64 sq_qs:7; + u64 sq_idx:3; + u64 rsvd3:8; + u64 send_status:8; + + u64 ptp_timestamp:64; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 send_status:8; + u64 rsvd3:8; + u64 sq_idx:3; + u64 sq_qs:7; + u64 rsvd2:10; + u64 rsvd1:4; + u64 sqe_ptr:16; + u64 rsvd0:4; + u64 cqe_type:4; /* W0 */ + + u64 ptp_timestamp:64; /* W1 */ +#endif +}; + +union cq_desc_t { + u64 u[64]; + struct cqe_send_t snd_hdr; + struct cqe_rx_t rx_hdr; + struct cqe_rx_tcp_t rx_tcp_hdr; + struct cqe_rx_tcp_err_t rx_tcp_err_hdr; +}; + +struct rbdr_entry_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd0:15; + u64 buf_addr:42; + u64 cache_align:7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 cache_align:7; + u64 buf_addr:42; + u64 rsvd0:15; +#endif +}; + +/* TCP reassembly context */ +struct rbe_tcp_cnxt_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 tcp_pkt_cnt:12; + u64 rsvd1:4; + u64 align_hdr_bytes:4; + u64 align_ptr_bytes:4; + u64 ptr_bytes:16; + u64 rsvd2:24; + u64 cqe_type:4; + u64 rsvd0:54; + u64 tcp_end_reason:2; + u64 tcp_status:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tcp_status:4; + u64 tcp_end_reason:2; + u64 rsvd0:54; + u64 cqe_type:4; + u64 rsvd2:24; + u64 ptr_bytes:16; + u64 align_ptr_bytes:4; + u64 align_hdr_bytes:4; + u64 rsvd1:4; + u64 tcp_pkt_cnt:12; +#endif +}; + +/* Always Big endian */ +struct rx_hdr_t { + u64 opaque:32; + u64 rss_flow:8; + u64 skip_length:6; + u64 disable_rss:1; + u64 disable_tcp_reassembly:1; + u64 nodrop:1; + u64 dest_alg:2; + u64 rsvd0:2; + u64 dest_rq:11; +}; + +enum send_l4_csum_type { + SEND_L4_CSUM_DISABLE = 0x00, + SEND_L4_CSUM_UDP = 0x01, + SEND_L4_CSUM_TCP = 0x02, + SEND_L4_CSUM_SCTP = 0x03, +}; + +enum send_crc_alg { + SEND_CRCALG_CRC32 = 0x00, + SEND_CRCALG_CRC32C = 0x01, + SEND_CRCALG_ICRC = 0x02, +}; + +enum send_load_type { + SEND_LD_TYPE_LDD = 0x00, + SEND_LD_TYPE_LDT = 0x01, + SEND_LD_TYPE_LDWB = 0x02, +}; + +enum send_mem_alg_type { + SEND_MEMALG_SET = 0x00, + SEND_MEMALG_ADD = 0x08, + SEND_MEMALG_SUB = 0x09, + SEND_MEMALG_ADDLEN = 0x0A, + SEND_MEMALG_SUBLEN = 0x0B, +}; + +enum send_mem_dsz_type { + SEND_MEMDSZ_B64 = 0x00, + SEND_MEMDSZ_B32 = 0x01, + SEND_MEMDSZ_B8 = 0x03, +}; + +enum sq_subdesc_type { + SQ_DESC_TYPE_INVALID = 0x00, + SQ_DESC_TYPE_HEADER = 0x01, + SQ_DESC_TYPE_CRC = 0x02, + SQ_DESC_TYPE_IMMEDIATE = 0x03, + SQ_DESC_TYPE_GATHER = 0x04, + SQ_DESC_TYPE_MEMORY = 0x05, +}; + +struct sq_crc_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd1:32; + u64 crc_ival:32; + u64 subdesc_type:4; + u64 crc_alg:2; + u64 rsvd0:10; + u64 crc_insert_pos:16; + u64 hdr_start:16; + u64 crc_len:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 crc_len:16; + u64 hdr_start:16; + u64 crc_insert_pos:16; + u64 rsvd0:10; + u64 crc_alg:2; + u64 subdesc_type:4; + u64 crc_ival:32; + u64 rsvd1:32; +#endif +}; + +struct sq_gather_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 ld_type:2; + u64 rsvd0:42; + u64 size:16; + + u64 rsvd1:15; /* W1 */ + u64 addr:49; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 size:16; + u64 rsvd0:42; + u64 ld_type:2; + u64 subdesc_type:4; /* W0 */ + + u64 addr:49; + u64 rsvd1:15; /* W1 */ +#endif +}; + +/* SQ immediate subdescriptor */ +struct sq_imm_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 rsvd0:46; + u64 len:14; + + u64 data:64; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 len:14; + u64 rsvd0:46; + u64 subdesc_type:4; /* W0 */ + + u64 data:64; /* W1 */ +#endif +}; + +struct sq_mem_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 mem_alg:4; + u64 mem_dsz:2; + u64 wmem:1; + u64 rsvd0:21; + u64 offset:32; + + u64 rsvd1:15; /* W1 */ + u64 addr:49; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 offset:32; + u64 rsvd0:21; + u64 wmem:1; + u64 mem_dsz:2; + u64 mem_alg:4; + u64 subdesc_type:4; /* W0 */ + + u64 addr:49; + u64 rsvd1:15; /* W1 */ +#endif +}; + +struct sq_hdr_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; + u64 tso:1; + u64 post_cqe:1; /* Post CQE on no error also */ + u64 dont_send:1; + u64 tstmp:1; + u64 subdesc_cnt:8; + u64 csum_l4:2; + u64 csum_l3:1; + u64 rsvd0:5; + u64 l4_offset:8; + u64 l3_offset:8; + u64 rsvd1:4; + u64 tot_len:20; /* W0 */ + + u64 tso_sdc_cont:8; + u64 tso_sdc_first:8; + u64 tso_l4_offset:8; + u64 tso_flags_last:12; + u64 tso_flags_first:12; + u64 rsvd2:2; + u64 tso_max_paysize:14; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tot_len:20; + u64 rsvd1:4; + u64 l3_offset:8; + u64 l4_offset:8; + u64 rsvd0:5; + u64 csum_l3:1; + u64 csum_l4:2; + u64 subdesc_cnt:8; + u64 tstmp:1; + u64 dont_send:1; + u64 post_cqe:1; /* Post CQE on no error also */ + u64 tso:1; + u64 subdesc_type:4; /* W0 */ + + u64 tso_max_paysize:14; + u64 rsvd2:2; + u64 tso_flags_first:12; + u64 tso_flags_last:12; + u64 tso_l4_offset:8; + u64 tso_sdc_first:8; + u64 tso_sdc_cont:8; /* W1 */ +#endif +}; + +/* Queue config register formats */ +struct rq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_2_63:62; + u64 ena:1; + u64 tcp_ena:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tcp_ena:1; + u64 ena:1; + u64 reserved_2_63:62; +#endif +}; + +struct cq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_43_63:21; + u64 ena:1; + u64 reset:1; + u64 caching:1; + u64 reserved_35_39:5; + u64 qsize:3; + u64 reserved_25_31:7; + u64 avg_con:9; + u64 reserved_0_15:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 reserved_0_15:16; + u64 avg_con:9; + u64 reserved_25_31:7; + u64 qsize:3; + u64 reserved_35_39:5; + u64 caching:1; + u64 reset:1; + u64 ena:1; + u64 reserved_43_63:21; +#endif +}; + +struct sq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_20_63:44; + u64 ena:1; + u64 reserved_18_18:1; + u64 reset:1; + u64 ldwb:1; + u64 reserved_11_15:5; + u64 qsize:3; + u64 reserved_3_7:5; + u64 tstmp_bgx_intf:3; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tstmp_bgx_intf:3; + u64 reserved_3_7:5; + u64 qsize:3; + u64 reserved_11_15:5; + u64 ldwb:1; + u64 reset:1; + u64 reserved_18_18:1; + u64 ena:1; + u64 reserved_20_63:44; +#endif +}; + +struct rbdr_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_45_63:19; + u64 ena:1; + u64 reset:1; + u64 ldwb:1; + u64 reserved_36_41:6; + u64 qsize:4; + u64 reserved_25_31:7; + u64 avg_con:9; + u64 reserved_12_15:4; + u64 lines:12; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 lines:12; + u64 reserved_12_15:4; + u64 avg_con:9; + u64 reserved_25_31:7; + u64 qsize:4; + u64 reserved_36_41:6; + u64 ldwb:1; + u64 reset:1; + u64 ena: 1; + u64 reserved_45_63:19; +#endif +}; + +struct qs_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_32_63:32; + u64 ena:1; + u64 reserved_27_30:4; + u64 sq_ins_ena:1; + u64 sq_ins_pos:6; + u64 lock_ena:1; + u64 lock_viol_cqe_ena:1; + u64 send_tstmp_ena:1; + u64 be:1; + u64 reserved_7_15:9; + u64 vnic:7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 vnic:7; + u64 reserved_7_15:9; + u64 be:1; + u64 send_tstmp_ena:1; + u64 lock_viol_cqe_ena:1; + u64 lock_ena:1; + u64 sq_ins_pos:6; + u64 sq_ins_ena:1; + u64 reserved_27_30:4; + u64 ena:1; + u64 reserved_32_63:32; +#endif +}; + +#endif /* Q_STRUCT_H */ diff --git a/sys/dev/vnic/thunder_bgx.c b/sys/dev/vnic/thunder_bgx.c new file mode 100644 index 000000000000..3dabd59668c8 --- /dev/null +++ b/sys/dev/vnic/thunder_bgx.c @@ -0,0 +1,1212 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <linux/acpi.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/phy.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> + +#include "nic_reg.h" +#include "nic.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-BGX" +#define DRV_VERSION "1.0" + +struct lmac { + struct bgx *bgx; + int dmac; + u8 mac[ETH_ALEN]; + bool link_up; + int lmacid; /* ID within BGX */ + int lmacid_bd; /* ID on board */ + struct net_device netdev; + struct phy_device *phydev; + unsigned int last_duplex; + unsigned int last_link; + unsigned int last_speed; + bool is_sgmii; + struct delayed_work dwork; + struct workqueue_struct *check_link; +}; + +struct bgx { + u8 bgx_id; + u8 qlm_mode; + struct lmac lmac[MAX_LMAC_PER_BGX]; + int lmac_count; + int lmac_type; + int lane_to_sds; + int use_training; + void __iomem *reg_base; + struct pci_dev *pdev; +}; + +static struct bgx *bgx_vnic[MAX_BGX_THUNDER]; +static int lmac_count; /* Total no of LMACs in system */ + +static int bgx_xaui_check_link(struct lmac *lmac); + +/* Supported devices */ +static const struct pci_device_id bgx_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Cavium Inc"); +MODULE_DESCRIPTION("Cavium Thunder BGX/MAC Driver"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, bgx_id_table); + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +static u64 bgx_reg_read(struct bgx *bgx, u8 lmac, u64 offset) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + return readq_relaxed(addr); +} + +static void bgx_reg_write(struct bgx *bgx, u8 lmac, u64 offset, u64 val) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + writeq_relaxed(val, addr); +} + +static void bgx_reg_modify(struct bgx *bgx, u8 lmac, u64 offset, u64 val) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + writeq_relaxed(val | readq_relaxed(addr), addr); +} + +static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero) +{ + int timeout = 100; + u64 reg_val; + + while (timeout) { + reg_val = bgx_reg_read(bgx, lmac, reg); + if (zero && !(reg_val & mask)) + return 0; + if (!zero && (reg_val & mask)) + return 0; + usleep_range(1000, 2000); + timeout--; + } + return 1; +} + +/* Return number of BGX present in HW */ +unsigned bgx_get_map(int node) +{ + int i; + unsigned map = 0; + + for (i = 0; i < MAX_BGX_PER_CN88XX; i++) { + if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i]) + map |= (1 << i); + } + + return map; +} +EXPORT_SYMBOL(bgx_get_map); + +/* Return number of LMAC configured for this BGX */ +int bgx_get_lmac_count(int node, int bgx_idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (bgx) + return bgx->lmac_count; + + return 0; +} +EXPORT_SYMBOL(bgx_get_lmac_count); + +/* Returns the current link status of LMAC */ +void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status) +{ + struct bgx_link_status *link = (struct bgx_link_status *)status; + struct bgx *bgx; + struct lmac *lmac; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return; + + lmac = &bgx->lmac[lmacid]; + link->link_up = lmac->link_up; + link->duplex = lmac->last_duplex; + link->speed = lmac->last_speed; +} +EXPORT_SYMBOL(bgx_get_lmac_link_state); + +const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + + if (bgx) + return bgx->lmac[lmacid].mac; + + return NULL; +} +EXPORT_SYMBOL(bgx_get_lmac_mac); + +void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + + if (!bgx) + return; + + ether_addr_copy(bgx->lmac[lmacid].mac, mac); +} +EXPORT_SYMBOL(bgx_set_lmac_mac); + +static void bgx_sgmii_change_link_state(struct lmac *lmac) +{ + struct bgx *bgx = lmac->bgx; + u64 cmr_cfg; + u64 port_cfg = 0; + u64 misc_ctl = 0; + + cmr_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG); + cmr_cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); + + port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); + misc_ctl = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL); + + if (lmac->link_up) { + misc_ctl &= ~PCS_MISC_CTL_GMX_ENO; + port_cfg &= ~GMI_PORT_CFG_DUPLEX; + port_cfg |= (lmac->last_duplex << 2); + } else { + misc_ctl |= PCS_MISC_CTL_GMX_ENO; + } + + switch (lmac->last_speed) { + case 10: + port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */ + port_cfg |= GMI_PORT_CFG_SPEED_MSB; /* speed_msb 1 */ + port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 50; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0); + break; + case 100: + port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */ + port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */ + port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 5; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0); + break; + case 1000: + port_cfg |= GMI_PORT_CFG_SPEED; /* speed 1 */ + port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */ + port_cfg |= GMI_PORT_CFG_SLOT_TIME; /* slottime 1 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 1; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 512); + if (lmac->last_duplex) + bgx_reg_write(bgx, lmac->lmacid, + BGX_GMP_GMI_TXX_BURST, 0); + else + bgx_reg_write(bgx, lmac->lmacid, + BGX_GMP_GMI_TXX_BURST, 8192); + break; + default: + break; + } + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL, misc_ctl); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG, port_cfg); + + port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); + + /* renable lmac */ + cmr_cfg |= CMR_EN; + bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); +} + +static void bgx_lmac_handler(struct net_device *netdev) +{ + struct lmac *lmac = container_of(netdev, struct lmac, netdev); + struct phy_device *phydev = lmac->phydev; + int link_changed = 0; + + if (!lmac) + return; + + if (!phydev->link && lmac->last_link) + link_changed = -1; + + if (phydev->link && + (lmac->last_duplex != phydev->duplex || + lmac->last_link != phydev->link || + lmac->last_speed != phydev->speed)) { + link_changed = 1; + } + + lmac->last_link = phydev->link; + lmac->last_speed = phydev->speed; + lmac->last_duplex = phydev->duplex; + + if (!link_changed) + return; + + if (link_changed > 0) + lmac->link_up = true; + else + lmac->link_up = false; + + if (lmac->is_sgmii) + bgx_sgmii_change_link_state(lmac); + else + bgx_xaui_check_link(lmac); +} + +u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return 0; + + if (idx > 8) + lmac = 0; + return bgx_reg_read(bgx, lmac, BGX_CMRX_RX_STAT0 + (idx * 8)); +} +EXPORT_SYMBOL(bgx_get_rx_stats); + +u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return 0; + + return bgx_reg_read(bgx, lmac, BGX_CMRX_TX_STAT0 + (idx * 8)); +} +EXPORT_SYMBOL(bgx_get_tx_stats); + +static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac) +{ + u64 offset; + + while (bgx->lmac[lmac].dmac > 0) { + offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) + + (lmac * MAX_DMAC_PER_LMAC * sizeof(u64)); + bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0); + bgx->lmac[lmac].dmac--; + } +} + +void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac) +{ + u64 offset; + struct bgx *bgx; + +#ifdef BGX_IN_PROMISCUOUS_MODE + return; +#endif + + bgx_idx += node * MAX_BGX_PER_CN88XX; + bgx = bgx_vnic[bgx_idx]; + + if (!bgx) { + dev_err(&bgx->pdev->dev, + "BGX%d not yet initialized, ignoring DMAC addition\n", + bgx_idx); + return; + } + + dmac = dmac | (1ULL << 48) | ((u64)lmac << 49); /* Enable DMAC */ + if (bgx->lmac[lmac].dmac == MAX_DMAC_PER_LMAC) { + dev_err(&bgx->pdev->dev, + "Max DMAC filters for LMAC%d reached, ignoring\n", + lmac); + return; + } + + if (bgx->lmac[lmac].dmac == MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE) + bgx->lmac[lmac].dmac = 1; + + offset = (bgx->lmac[lmac].dmac * sizeof(u64)) + + (lmac * MAX_DMAC_PER_LMAC * sizeof(u64)); + bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, dmac); + bgx->lmac[lmac].dmac++; + + bgx_reg_write(bgx, lmac, BGX_CMRX_RX_DMAC_CTL, + (CAM_ACCEPT << 3) | (MCAST_MODE_CAM_FILTER << 1) + | (BCAST_ACCEPT << 0)); +} +EXPORT_SYMBOL(bgx_add_dmac_addr); + +/* Configure BGX LMAC in internal loopback mode */ +void bgx_lmac_internal_loopback(int node, int bgx_idx, + int lmac_idx, bool enable) +{ + struct bgx *bgx; + struct lmac *lmac; + u64 cfg; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return; + + lmac = &bgx->lmac[lmac_idx]; + if (lmac->is_sgmii) { + cfg = bgx_reg_read(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL); + if (enable) + cfg |= PCS_MRX_CTL_LOOPBACK1; + else + cfg &= ~PCS_MRX_CTL_LOOPBACK1; + bgx_reg_write(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL, cfg); + } else { + cfg = bgx_reg_read(bgx, lmac_idx, BGX_SPUX_CONTROL1); + if (enable) + cfg |= SPU_CTL_LOOPBACK; + else + cfg &= ~SPU_CTL_LOOPBACK; + bgx_reg_write(bgx, lmac_idx, BGX_SPUX_CONTROL1, cfg); + } +} +EXPORT_SYMBOL(bgx_lmac_internal_loopback); + +static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) +{ + u64 cfg; + + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30); + /* max packet size */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_RXX_JABBER, MAX_FRAME_SIZE); + + /* Disable frame alignment if using preamble */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND); + if (cfg & 1) + bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_SGMII_CTL, 0); + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); + + /* PCS reset */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_RESET); + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, + PCS_MRX_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX PCS reset not completed\n"); + return -1; + } + + /* power down, reset autoneg, autoneg enable */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL); + cfg &= ~PCS_MRX_CTL_PWR_DN; + cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN); + bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg); + + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, + PCS_MRX_STATUS_AN_CPT, false)) { + dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); + return -1; + } + + return 0; +} + +static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) +{ + u64 cfg; + + /* Reset SPU */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET); + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n"); + return -1; + } + + /* Disable LMAC */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); + /* Set interleaved running disparity for RXAUI */ + if (bgx->lmac_type != BGX_MODE_RXAUI) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); + else + bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, + SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP); + + /* clear all interrupts */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT); + bgx_reg_write(bgx, lmacid, BGX_SMUX_RX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_INT); + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + + if (bgx->use_training) { + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00); + /* training enable */ + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL, SPU_PMD_CRTL_TRAIN_EN); + } + + /* Append FCS to each packet */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, SMU_TX_APPEND_FCS_D); + + /* Disable forward error correction */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_FEC_CONTROL); + cfg &= ~SPU_FEC_CTL_FEC_EN; + bgx_reg_write(bgx, lmacid, BGX_SPUX_FEC_CONTROL, cfg); + + /* Disable autoneg */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_CONTROL); + cfg = cfg & ~(SPU_AN_CTL_AN_EN | SPU_AN_CTL_XNP_EN); + bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV); + if (bgx->lmac_type == BGX_MODE_10G_KR) + cfg |= (1 << 23); + else if (bgx->lmac_type == BGX_MODE_40G_KR) + cfg |= (1 << 24); + else + cfg &= ~((1 << 23) | (1 << 24)); + cfg = cfg & (~((1ULL << 25) | (1ULL << 22) | (1ULL << 12))); + bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_ADV, cfg); + + cfg = bgx_reg_read(bgx, 0, BGX_SPU_DBG_CONTROL); + cfg &= ~SPU_DBG_CTL_AN_ARB_LINK_CHK_EN; + bgx_reg_write(bgx, 0, BGX_SPU_DBG_CONTROL, cfg); + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_CONTROL1); + cfg &= ~SPU_CTL_LOW_POWER; + bgx_reg_write(bgx, lmacid, BGX_SPUX_CONTROL1, cfg); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_CTL); + cfg &= ~SMU_TX_CTL_UNI_EN; + cfg |= SMU_TX_CTL_DIC_EN; + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg); + + /* take lmac_count into account */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1)); + /* max packet size */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_RX_JABBER, MAX_FRAME_SIZE); + + return 0; +} + +static int bgx_xaui_check_link(struct lmac *lmac) +{ + struct bgx *bgx = lmac->bgx; + int lmacid = lmac->lmacid; + int lmac_type = bgx->lmac_type; + u64 cfg; + + bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); + if (bgx->use_training) { + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + if (!(cfg & (1ull << 13))) { + cfg = (1ull << 13) | (1ull << 14); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL); + cfg |= (1ull << 0); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL, cfg); + return -1; + } + } + + /* wait for PCS to come out of reset */ + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n"); + return -1; + } + + if ((lmac_type == BGX_MODE_10G_KR) || (lmac_type == BGX_MODE_XFI) || + (lmac_type == BGX_MODE_40G_KR) || (lmac_type == BGX_MODE_XLAUI)) { + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BR_STATUS1, + SPU_BR_STATUS_BLK_LOCK, false)) { + dev_err(&bgx->pdev->dev, + "SPU_BR_STATUS_BLK_LOCK not completed\n"); + return -1; + } + } else { + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BX_STATUS, + SPU_BX_STATUS_RX_ALIGN, false)) { + dev_err(&bgx->pdev->dev, + "SPU_BX_STATUS_RX_ALIGN not completed\n"); + return -1; + } + } + + /* Clear rcvflt bit (latching high) and read it back */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { + dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); + if (bgx->use_training) { + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + if (!(cfg & (1ull << 13))) { + cfg = (1ull << 13) | (1ull << 14); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL); + cfg |= (1ull << 0); + bgx_reg_write(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL, cfg); + return -1; + } + } + return -1; + } + + /* Wait for MAC RX to be ready */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL, + SMU_RX_CTL_STATUS, true)) { + dev_err(&bgx->pdev->dev, "SMU RX link not okay\n"); + return -1; + } + + /* Wait for BGX RX to be idle */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) { + dev_err(&bgx->pdev->dev, "SMU RX not idle\n"); + return -1; + } + + /* Wait for BGX TX to be idle */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_TX_IDLE, false)) { + dev_err(&bgx->pdev->dev, "SMU TX not idle\n"); + return -1; + } + + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { + dev_err(&bgx->pdev->dev, "Receive fault\n"); + return -1; + } + + /* Receive link is latching low. Force it high and verify it */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1, + SPU_STATUS1_RCV_LNK, false)) { + dev_err(&bgx->pdev->dev, "SPU receive link down\n"); + return -1; + } + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); + cfg &= ~SPU_MISC_CTL_RX_DIS; + bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); + return 0; +} + +static void bgx_poll_for_link(struct work_struct *work) +{ + struct lmac *lmac; + u64 link; + + lmac = container_of(work, struct lmac, dwork.work); + + /* Receive link is latching low. Force it high and verify it */ + bgx_reg_modify(lmac->bgx, lmac->lmacid, + BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); + bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1, + SPU_STATUS1_RCV_LNK, false); + + link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); + if (link & SPU_STATUS1_RCV_LNK) { + lmac->link_up = 1; + if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) + lmac->last_speed = 40000; + else + lmac->last_speed = 10000; + lmac->last_duplex = 1; + } else { + lmac->link_up = 0; + } + + if (lmac->last_link != lmac->link_up) { + lmac->last_link = lmac->link_up; + if (lmac->link_up) + bgx_xaui_check_link(lmac); + } + + queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); +} + +static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) +{ + u64 dmac_bcast = (1ULL << 48) - 1; + struct lmac *lmac; + u64 cfg; + + lmac = &bgx->lmac[lmacid]; + lmac->bgx = bgx; + + if (bgx->lmac_type == BGX_MODE_SGMII) { + lmac->is_sgmii = 1; + if (bgx_lmac_sgmii_init(bgx, lmacid)) + return -1; + } else { + lmac->is_sgmii = 0; + if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type)) + return -1; + } + + if (lmac->is_sgmii) { + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND); + cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND, cfg); + bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_MIN_PKT, 60 - 1); + } else { + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_APPEND); + cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, cfg); + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4); + } + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, + CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Restore default cfg, incase low level firmware changed it */ + bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); + + /* Add broadcast MAC into all LMAC's DMAC filters */ + bgx_add_dmac_addr(dmac_bcast, 0, bgx->bgx_id, lmacid); + + if ((bgx->lmac_type != BGX_MODE_XFI) && + (bgx->lmac_type != BGX_MODE_XLAUI) && + (bgx->lmac_type != BGX_MODE_40G_KR) && + (bgx->lmac_type != BGX_MODE_10G_KR)) { + if (!lmac->phydev) + return -ENODEV; + + lmac->phydev->dev_flags = 0; + + if (phy_connect_direct(&lmac->netdev, lmac->phydev, + bgx_lmac_handler, + PHY_INTERFACE_MODE_SGMII)) + return -ENODEV; + + phy_start_aneg(lmac->phydev); + } else { + lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!lmac->check_link) + return -ENOMEM; + INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); + queue_delayed_work(lmac->check_link, &lmac->dwork, 0); + } + + return 0; +} + +static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) +{ + struct lmac *lmac; + u64 cmrx_cfg; + + lmac = &bgx->lmac[lmacid]; + if (lmac->check_link) { + /* Destroy work queue */ + cancel_delayed_work(&lmac->dwork); + flush_workqueue(lmac->check_link); + destroy_workqueue(lmac->check_link); + } + + cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cmrx_cfg &= ~(1 << 15); + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); + bgx_flush_dmac_addrs(bgx, lmacid); + + if ((bgx->lmac_type != BGX_MODE_XFI) && + (bgx->lmac_type != BGX_MODE_XLAUI) && + (bgx->lmac_type != BGX_MODE_40G_KR) && + (bgx->lmac_type != BGX_MODE_10G_KR)) { + if (lmac->phydev) + phy_disconnect(lmac->phydev); + } + lmac->phydev = NULL; +} + +static void bgx_set_num_ports(struct bgx *bgx) +{ + u64 lmac_count; + + switch (bgx->qlm_mode) { + case QLM_MODE_SGMII: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_SGMII; + bgx->lane_to_sds = 0; + break; + case QLM_MODE_XAUI_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_XAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_RXAUI_2X2: + bgx->lmac_count = 2; + bgx->lmac_type = BGX_MODE_RXAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_XFI_4X1: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_XFI; + bgx->lane_to_sds = 0; + break; + case QLM_MODE_XLAUI_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_XLAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_10G_KR_4X1: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_10G_KR; + bgx->lane_to_sds = 0; + bgx->use_training = 1; + break; + case QLM_MODE_40G_KR4_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_40G_KR; + bgx->lane_to_sds = 0xE4; + bgx->use_training = 1; + break; + default: + bgx->lmac_count = 0; + break; + } + + /* Check if low level firmware has programmed LMAC count + * based on board type, if yes consider that otherwise + * the default static values + */ + lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; + if (lmac_count != 4) + bgx->lmac_count = lmac_count; +} + +static void bgx_init_hw(struct bgx *bgx) +{ + int i; + + bgx_set_num_ports(bgx); + + bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP); + if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS)) + dev_err(&bgx->pdev->dev, "BGX%d BIST failed\n", bgx->bgx_id); + + /* Set lmac type and lane2serdes mapping */ + for (i = 0; i < bgx->lmac_count; i++) { + if (bgx->lmac_type == BGX_MODE_RXAUI) { + if (i) + bgx->lane_to_sds = 0x0e; + else + bgx->lane_to_sds = 0x04; + bgx_reg_write(bgx, i, BGX_CMRX_CFG, + (bgx->lmac_type << 8) | bgx->lane_to_sds); + continue; + } + bgx_reg_write(bgx, i, BGX_CMRX_CFG, + (bgx->lmac_type << 8) | (bgx->lane_to_sds + i)); + bgx->lmac[i].lmacid_bd = lmac_count; + lmac_count++; + } + + bgx_reg_write(bgx, 0, BGX_CMR_TX_LMACS, bgx->lmac_count); + bgx_reg_write(bgx, 0, BGX_CMR_RX_LMACS, bgx->lmac_count); + + /* Set the backpressure AND mask */ + for (i = 0; i < bgx->lmac_count; i++) + bgx_reg_modify(bgx, 0, BGX_CMR_CHAN_MSK_AND, + ((1ULL << MAX_BGX_CHANS_PER_LMAC) - 1) << + (i * MAX_BGX_CHANS_PER_LMAC)); + + /* Disable all MAC filtering */ + for (i = 0; i < RX_DMAC_COUNT; i++) + bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + (i * 8), 0x00); + + /* Disable MAC steering (NCSI traffic) */ + for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++) + bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); +} + +static void bgx_get_qlm_mode(struct bgx *bgx) +{ + struct device *dev = &bgx->pdev->dev; + int lmac_type; + int train_en; + + /* Read LMAC0 type to figure out QLM mode + * This is configured by low level firmware + */ + lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); + lmac_type = (lmac_type >> 8) & 0x07; + + train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; + + switch (lmac_type) { + case BGX_MODE_SGMII: + bgx->qlm_mode = QLM_MODE_SGMII; + dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id); + break; + case BGX_MODE_XAUI: + bgx->qlm_mode = QLM_MODE_XAUI_1X4; + dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id); + break; + case BGX_MODE_RXAUI: + bgx->qlm_mode = QLM_MODE_RXAUI_2X2; + dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id); + break; + case BGX_MODE_XFI: + if (!train_en) { + bgx->qlm_mode = QLM_MODE_XFI_4X1; + dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id); + } else { + bgx->qlm_mode = QLM_MODE_10G_KR_4X1; + dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id); + } + break; + case BGX_MODE_XLAUI: + if (!train_en) { + bgx->qlm_mode = QLM_MODE_XLAUI_1X4; + dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id); + } else { + bgx->qlm_mode = QLM_MODE_40G_KR4_1X4; + dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id); + } + break; + default: + bgx->qlm_mode = QLM_MODE_SGMII; + dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id); + } +} + +#ifdef CONFIG_ACPI + +static int bgx_match_phy_id(struct device *dev, void *data) +{ + struct phy_device *phydev = to_phy_device(dev); + u32 *phy_id = data; + + if (phydev->addr == *phy_id) + return 1; + + return 0; +} + +static const char *addr_propnames[] = { + "mac-address", + "local-mac-address", + "address", +}; + +static int acpi_get_mac_address(struct acpi_device *adev, u8 *dst) +{ + u64 mac; + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(addr_propnames); i++) { + ret = acpi_dev_prop_read_single(adev, addr_propnames[i], + DEV_PROP_U64, &mac); + if (ret) + continue; + + if (mac & (~0ULL << 48)) + continue; /* more than 6 bytes */ + + if (!is_valid_ether_addr((u8 *)&mac)) + continue; + + ether_addr_copy(dst, (u8 *)&mac); + + return 0; + } + + return ret ? ret : -EINVAL; +} + +static acpi_status bgx_acpi_register_phy(acpi_handle handle, + u32 lvl, void *context, void **rv) +{ + struct acpi_reference_args args; + struct bgx *bgx = context; + struct acpi_device *adev; + struct device *phy_dev; + u32 phy_id; + + if (acpi_bus_get_device(handle, &adev)) + return AE_OK; + + if (acpi_dev_get_property_reference(adev, "phy-handle", 0, &args)) + return AE_OK; + + if (acpi_dev_prop_read_single(args.adev, "phy-channel", DEV_PROP_U32, + &phy_id)) + return AE_OK; + + phy_dev = bus_find_device(&mdio_bus_type, NULL, (void *)&phy_id, + bgx_match_phy_id); + if (!phy_dev) + return AE_OK; + + SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, &bgx->pdev->dev); + bgx->lmac[bgx->lmac_count].phydev = to_phy_device(phy_dev); + + acpi_get_mac_address(adev, bgx->lmac[bgx->lmac_count].mac); + + bgx->lmac[bgx->lmac_count].lmacid = bgx->lmac_count; + bgx->lmac_count++; + + return AE_OK; +} + +static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, + void *context, void **ret_val) +{ + struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; + struct bgx *bgx = context; + char bgx_sel[5]; + + snprintf(bgx_sel, 5, "BGX%d", bgx->bgx_id); + if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &string))) { + pr_warn("Invalid link device\n"); + return AE_OK; + } + + if (strncmp(string.pointer, bgx_sel, 4)) + return AE_OK; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + bgx_acpi_register_phy, NULL, bgx, NULL); + + kfree(string.pointer); + return AE_CTRL_TERMINATE; +} + +static int bgx_init_acpi_phy(struct bgx *bgx) +{ + acpi_get_devices(NULL, bgx_acpi_match_id, bgx, (void **)NULL); + return 0; +} + +#else + +static int bgx_init_acpi_phy(struct bgx *bgx) +{ + return -ENODEV; +} + +#endif /* CONFIG_ACPI */ + +#if IS_ENABLED(CONFIG_OF_MDIO) + +static int bgx_init_of_phy(struct bgx *bgx) +{ + struct device_node *np; + struct device_node *np_child; + u8 lmac = 0; + char bgx_sel[5]; + const char *mac; + + /* Get BGX node from DT */ + snprintf(bgx_sel, 5, "bgx%d", bgx->bgx_id); + np = of_find_node_by_name(NULL, bgx_sel); + if (!np) + return -ENODEV; + + for_each_child_of_node(np, np_child) { + struct device_node *phy_np = of_parse_phandle(np_child, + "phy-handle", 0); + if (!phy_np) + continue; + bgx->lmac[lmac].phydev = of_phy_find_device(phy_np); + + mac = of_get_mac_address(np_child); + if (mac) + ether_addr_copy(bgx->lmac[lmac].mac, mac); + + SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev); + bgx->lmac[lmac].lmacid = lmac; + lmac++; + if (lmac == MAX_LMAC_PER_BGX) + break; + } + return 0; +} + +#else + +static int bgx_init_of_phy(struct bgx *bgx) +{ + return -ENODEV; +} + +#endif /* CONFIG_OF_MDIO */ + +static int bgx_init_phy(struct bgx *bgx) +{ + int err = bgx_init_of_phy(bgx); + + if (err != -ENODEV) + return err; + + return bgx_init_acpi_phy(bgx); +} + +static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err; + struct device *dev = &pdev->dev; + struct bgx *bgx = NULL; + u8 lmac; + + bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); + if (!bgx) + return -ENOMEM; + bgx->pdev = pdev; + + pci_set_drvdata(pdev, bgx); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + /* MAP configuration registers */ + bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!bgx->reg_base) { + dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX; + + bgx_vnic[bgx->bgx_id] = bgx; + bgx_get_qlm_mode(bgx); + + err = bgx_init_phy(bgx); + if (err) + goto err_enable; + + bgx_init_hw(bgx); + + /* Enable all LMACs */ + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + err = bgx_lmac_enable(bgx, lmac); + if (err) { + dev_err(dev, "BGX%d failed to enable lmac%d\n", + bgx->bgx_id, lmac); + goto err_enable; + } + } + + return 0; + +err_enable: + bgx_vnic[bgx->bgx_id] = NULL; +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void bgx_remove(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + u8 lmac; + + /* Disable all LMACs */ + for (lmac = 0; lmac < bgx->lmac_count; lmac++) + bgx_lmac_disable(bgx, lmac); + + bgx_vnic[bgx->bgx_id] = NULL; + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver bgx_driver = { + .name = DRV_NAME, + .id_table = bgx_id_table, + .probe = bgx_probe, + .remove = bgx_remove, +}; + +static int __init bgx_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&bgx_driver); +} + +static void __exit bgx_cleanup_module(void) +{ + pci_unregister_driver(&bgx_driver); +} + +module_init(bgx_init_module); +module_exit(bgx_cleanup_module); diff --git a/sys/dev/vnic/thunder_bgx.h b/sys/dev/vnic/thunder_bgx.h new file mode 100644 index 000000000000..b1c761866b55 --- /dev/null +++ b/sys/dev/vnic/thunder_bgx.h @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2015 Cavium Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef THUNDER_BGX_H +#define THUNDER_BGX_H + +#define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ +#define MAX_BGX_PER_CN88XX 2 +#define MAX_LMAC_PER_BGX 4 +#define MAX_BGX_CHANS_PER_LMAC 16 +#define MAX_DMAC_PER_LMAC 8 +#define MAX_FRAME_SIZE 9216 + +#define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 + +#define MAX_LMAC (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX) + +/* Registers */ +#define BGX_CMRX_CFG 0x00 +#define CMR_PKT_TX_EN BIT_ULL(13) +#define CMR_PKT_RX_EN BIT_ULL(14) +#define CMR_EN BIT_ULL(15) +#define BGX_CMR_GLOBAL_CFG 0x08 +#define CMR_GLOBAL_CFG_FCS_STRIP BIT_ULL(6) +#define BGX_CMRX_RX_ID_MAP 0x60 +#define BGX_CMRX_RX_STAT0 0x70 +#define BGX_CMRX_RX_STAT1 0x78 +#define BGX_CMRX_RX_STAT2 0x80 +#define BGX_CMRX_RX_STAT3 0x88 +#define BGX_CMRX_RX_STAT4 0x90 +#define BGX_CMRX_RX_STAT5 0x98 +#define BGX_CMRX_RX_STAT6 0xA0 +#define BGX_CMRX_RX_STAT7 0xA8 +#define BGX_CMRX_RX_STAT8 0xB0 +#define BGX_CMRX_RX_STAT9 0xB8 +#define BGX_CMRX_RX_STAT10 0xC0 +#define BGX_CMRX_RX_BP_DROP 0xC8 +#define BGX_CMRX_RX_DMAC_CTL 0x0E8 +#define BGX_CMR_RX_DMACX_CAM 0x200 +#define RX_DMACX_CAM_EN BIT_ULL(48) +#define RX_DMACX_CAM_LMACID(x) (x << 49) +#define RX_DMAC_COUNT 32 +#define BGX_CMR_RX_STREERING 0x300 +#define RX_TRAFFIC_STEER_RULE_COUNT 8 +#define BGX_CMR_CHAN_MSK_AND 0x450 +#define BGX_CMR_BIST_STATUS 0x460 +#define BGX_CMR_RX_LMACS 0x468 +#define BGX_CMRX_TX_STAT0 0x600 +#define BGX_CMRX_TX_STAT1 0x608 +#define BGX_CMRX_TX_STAT2 0x610 +#define BGX_CMRX_TX_STAT3 0x618 +#define BGX_CMRX_TX_STAT4 0x620 +#define BGX_CMRX_TX_STAT5 0x628 +#define BGX_CMRX_TX_STAT6 0x630 +#define BGX_CMRX_TX_STAT7 0x638 +#define BGX_CMRX_TX_STAT8 0x640 +#define BGX_CMRX_TX_STAT9 0x648 +#define BGX_CMRX_TX_STAT10 0x650 +#define BGX_CMRX_TX_STAT11 0x658 +#define BGX_CMRX_TX_STAT12 0x660 +#define BGX_CMRX_TX_STAT13 0x668 +#define BGX_CMRX_TX_STAT14 0x670 +#define BGX_CMRX_TX_STAT15 0x678 +#define BGX_CMRX_TX_STAT16 0x680 +#define BGX_CMRX_TX_STAT17 0x688 +#define BGX_CMR_TX_LMACS 0x1000 + +#define BGX_SPUX_CONTROL1 0x10000 +#define SPU_CTL_LOW_POWER BIT_ULL(11) +#define SPU_CTL_LOOPBACK BIT_ULL(14) +#define SPU_CTL_RESET BIT_ULL(15) +#define BGX_SPUX_STATUS1 0x10008 +#define SPU_STATUS1_RCV_LNK BIT_ULL(2) +#define BGX_SPUX_STATUS2 0x10020 +#define SPU_STATUS2_RCVFLT BIT_ULL(10) +#define BGX_SPUX_BX_STATUS 0x10028 +#define SPU_BX_STATUS_RX_ALIGN BIT_ULL(12) +#define BGX_SPUX_BR_STATUS1 0x10030 +#define SPU_BR_STATUS_BLK_LOCK BIT_ULL(0) +#define SPU_BR_STATUS_RCV_LNK BIT_ULL(12) +#define BGX_SPUX_BR_PMD_CRTL 0x10068 +#define SPU_PMD_CRTL_TRAIN_EN BIT_ULL(1) +#define BGX_SPUX_BR_PMD_LP_CUP 0x10078 +#define BGX_SPUX_BR_PMD_LD_CUP 0x10088 +#define BGX_SPUX_BR_PMD_LD_REP 0x10090 +#define BGX_SPUX_FEC_CONTROL 0x100A0 +#define SPU_FEC_CTL_FEC_EN BIT_ULL(0) +#define SPU_FEC_CTL_ERR_EN BIT_ULL(1) +#define BGX_SPUX_AN_CONTROL 0x100C8 +#define SPU_AN_CTL_AN_EN BIT_ULL(12) +#define SPU_AN_CTL_XNP_EN BIT_ULL(13) +#define BGX_SPUX_AN_ADV 0x100D8 +#define BGX_SPUX_MISC_CONTROL 0x10218 +#define SPU_MISC_CTL_INTLV_RDISP BIT_ULL(10) +#define SPU_MISC_CTL_RX_DIS BIT_ULL(12) +#define BGX_SPUX_INT 0x10220 /* +(0..3) << 20 */ +#define BGX_SPUX_INT_W1S 0x10228 +#define BGX_SPUX_INT_ENA_W1C 0x10230 +#define BGX_SPUX_INT_ENA_W1S 0x10238 +#define BGX_SPU_DBG_CONTROL 0x10300 +#define SPU_DBG_CTL_AN_ARB_LINK_CHK_EN BIT_ULL(18) +#define SPU_DBG_CTL_AN_NONCE_MCT_DIS BIT_ULL(29) + +#define BGX_SMUX_RX_INT 0x20000 +#define BGX_SMUX_RX_JABBER 0x20030 +#define BGX_SMUX_RX_CTL 0x20048 +#define SMU_RX_CTL_STATUS (3ull << 0) +#define BGX_SMUX_TX_APPEND 0x20100 +#define SMU_TX_APPEND_FCS_D BIT_ULL(2) +#define BGX_SMUX_TX_MIN_PKT 0x20118 +#define BGX_SMUX_TX_INT 0x20140 +#define BGX_SMUX_TX_CTL 0x20178 +#define SMU_TX_CTL_DIC_EN BIT_ULL(0) +#define SMU_TX_CTL_UNI_EN BIT_ULL(1) +#define SMU_TX_CTL_LNK_STATUS (3ull << 4) +#define BGX_SMUX_TX_THRESH 0x20180 +#define BGX_SMUX_CTL 0x20200 +#define SMU_CTL_RX_IDLE BIT_ULL(0) +#define SMU_CTL_TX_IDLE BIT_ULL(1) + +#define BGX_GMP_PCS_MRX_CTL 0x30000 +#define PCS_MRX_CTL_RST_AN BIT_ULL(9) +#define PCS_MRX_CTL_PWR_DN BIT_ULL(11) +#define PCS_MRX_CTL_AN_EN BIT_ULL(12) +#define PCS_MRX_CTL_LOOPBACK1 BIT_ULL(14) +#define PCS_MRX_CTL_RESET BIT_ULL(15) +#define BGX_GMP_PCS_MRX_STATUS 0x30008 +#define PCS_MRX_STATUS_AN_CPT BIT_ULL(5) +#define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020 +#define BGX_GMP_PCS_SGM_AN_ADV 0x30068 +#define BGX_GMP_PCS_MISCX_CTL 0x30078 +#define PCS_MISC_CTL_GMX_ENO BIT_ULL(11) +#define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full +#define BGX_GMP_GMI_PRTX_CFG 0x38020 +#define GMI_PORT_CFG_SPEED BIT_ULL(1) +#define GMI_PORT_CFG_DUPLEX BIT_ULL(2) +#define GMI_PORT_CFG_SLOT_TIME BIT_ULL(3) +#define GMI_PORT_CFG_SPEED_MSB BIT_ULL(8) +#define BGX_GMP_GMI_RXX_JABBER 0x38038 +#define BGX_GMP_GMI_TXX_THRESH 0x38210 +#define BGX_GMP_GMI_TXX_APPEND 0x38218 +#define BGX_GMP_GMI_TXX_SLOT 0x38220 +#define BGX_GMP_GMI_TXX_BURST 0x38228 +#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 +#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 + +#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ +#define BGX_MSIX_VEC_0_29_CTL 0x400008 +#define BGX_MSIX_PBA_0 0x4F0000 + +/* MSI-X interrupts */ +#define BGX_MSIX_VECTORS 30 +#define BGX_LMAC_VEC_OFFSET 7 +#define BGX_MSIX_VEC_SHIFT 4 + +#define CMRX_INT 0 +#define SPUX_INT 1 +#define SMUX_RX_INT 2 +#define SMUX_TX_INT 3 +#define GMPX_PCS_INT 4 +#define GMPX_GMI_RX_INT 5 +#define GMPX_GMI_TX_INT 6 +#define CMR_MEM_INT 28 +#define SPU_MEM_INT 29 + +#define LMAC_INTR_LINK_UP BIT(0) +#define LMAC_INTR_LINK_DOWN BIT(1) + +/* RX_DMAC_CTL configuration*/ +enum MCAST_MODE { + MCAST_MODE_REJECT, + MCAST_MODE_ACCEPT, + MCAST_MODE_CAM_FILTER, + RSVD +}; + +#define BCAST_ACCEPT 1 +#define CAM_ACCEPT 1 + +void octeon_mdiobus_force_mod_depencency(void); +void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac); +unsigned bgx_get_map(int node); +int bgx_get_lmac_count(int node, int bgx); +const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid); +void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac); +void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status); +void bgx_lmac_internal_loopback(int node, int bgx_idx, + int lmac_idx, bool enable); +u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx); +u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx); +#define BGX_RX_STATS_COUNT 11 +#define BGX_TX_STATS_COUNT 18 + +struct bgx_stats { + u64 rx_stats[BGX_RX_STATS_COUNT]; + u64 tx_stats[BGX_TX_STATS_COUNT]; +}; + +#define BGX_IN_PROMISCUOUS_MODE 1 + +enum LMAC_TYPE { + BGX_MODE_SGMII = 0, /* 1 lane, 1.250 Gbaud */ + BGX_MODE_XAUI = 1, /* 4 lanes, 3.125 Gbaud */ + BGX_MODE_DXAUI = 1, /* 4 lanes, 6.250 Gbaud */ + BGX_MODE_RXAUI = 2, /* 2 lanes, 6.250 Gbaud */ + BGX_MODE_XFI = 3, /* 1 lane, 10.3125 Gbaud */ + BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */ + BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */ + BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */ +}; + +enum qlm_mode { + QLM_MODE_SGMII, /* SGMII, each lane independent */ + QLM_MODE_XAUI_1X4, /* 1 XAUI or DXAUI, 4 lanes */ + QLM_MODE_RXAUI_2X2, /* 2 RXAUI, 2 lanes each */ + QLM_MODE_XFI_4X1, /* 4 XFI, 1 lane each */ + QLM_MODE_XLAUI_1X4, /* 1 XLAUI, 4 lanes each */ + QLM_MODE_10G_KR_4X1, /* 4 10GBASE-KR, 1 lane each */ + QLM_MODE_40G_KR4_1X4, /* 1 40GBASE-KR4, 4 lanes each */ +}; + +#endif /* THUNDER_BGX_H */ |