aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2011-11-17 12:17:39 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2011-11-17 12:17:39 +0000
commit68b8534bdfeb5078e84d668124e7585e43b03502 (patch)
tree8be7a4f824011375a281269e79b86fa172e84386 /sys/dev/netmap
parenta93c40bb620da1a24e2e07b9bc0734736c1dae77 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap')
-rw-r--r--sys/dev/netmap/head.diff654
-rw-r--r--sys/dev/netmap/if_em_netmap.h383
-rw-r--r--sys/dev/netmap/if_igb_netmap.h378
-rw-r--r--sys/dev/netmap/if_lem_netmap.h344
-rw-r--r--sys/dev/netmap/if_re_netmap.h415
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h376
-rw-r--r--sys/dev/netmap/netmap.c1762
-rw-r--r--sys/dev/netmap/netmap_kern.h221
8 files changed, 4533 insertions, 0 deletions
diff --git a/sys/dev/netmap/head.diff b/sys/dev/netmap/head.diff
new file mode 100644
index 0000000000000..51a8e34e74d12
--- /dev/null
+++ b/sys/dev/netmap/head.diff
@@ -0,0 +1,654 @@
+Index: conf/NOTES
+===================================================================
+--- conf/NOTES (revision 227552)
++++ conf/NOTES (working copy)
+@@ -799,6 +799,12 @@
+ # option. DHCP requires bpf.
+ device bpf
+
++# The `netmap' device implements memory-mapped access to network
++# devices from userspace, enabling wire-speed packet capture and
++# generation even at 10Gbit/s. Requires support in the device
++# driver. Supported drivers are ixgbe, e1000, re.
++device netmap
++
+ # The `disc' device implements a minimal network interface,
+ # which throws away all packets sent and never receives any. It is
+ # included for testing and benchmarking purposes.
+Index: conf/files
+===================================================================
+--- conf/files (revision 227552)
++++ conf/files (working copy)
+@@ -1507,6 +1507,7 @@
+ dev/my/if_my.c optional my
+ dev/ncv/ncr53c500.c optional ncv
+ dev/ncv/ncr53c500_pccard.c optional ncv pccard
++dev/netmap/netmap.c optional netmap
+ dev/nge/if_nge.c optional nge
+ dev/nxge/if_nxge.c optional nxge
+ dev/nxge/xgehal/xgehal-device.c optional nxge
+Index: conf/options
+===================================================================
+--- conf/options (revision 227552)
++++ conf/options (working copy)
+@@ -689,6 +689,7 @@
+
+ # various 'device presence' options.
+ DEV_BPF opt_bpf.h
++DEV_NETMAP opt_global.h
+ DEV_MCA opt_mca.h
+ DEV_CARP opt_carp.h
+ DEV_SPLASH opt_splash.h
+Index: dev/e1000/if_igb.c
+===================================================================
+--- dev/e1000/if_igb.c (revision 227552)
++++ dev/e1000/if_igb.c (working copy)
+@@ -369,6 +369,9 @@
+ &igb_rx_process_limit, 0,
+ "Maximum number of received packets to process at a time, -1 means unlimited");
+
++#ifdef DEV_NETMAP
++#include <dev/netmap/if_igb_netmap.h>
++#endif /* DEV_NETMAP */
+ /*********************************************************************
+ * Device identification routine
+ *
+@@ -664,6 +667,9 @@
+ adapter->led_dev = led_create(igb_led_func, adapter,
+ device_get_nameunit(dev));
+
++#ifdef DEV_NETMAP
++ igb_netmap_attach(adapter);
++#endif /* DEV_NETMAP */
+ INIT_DEBUGOUT("igb_attach: end");
+
+ return (0);
+@@ -742,6 +748,9 @@
+
+ callout_drain(&adapter->timer);
+
++#ifdef DEV_NETMAP
++ netmap_detach(adapter->ifp);
++#endif /* DEV_NETMAP */
+ igb_free_pci_resources(adapter);
+ bus_generic_detach(dev);
+ if_free(ifp);
+@@ -3212,6 +3221,10 @@
+ struct adapter *adapter = txr->adapter;
+ struct igb_tx_buffer *txbuf;
+ int i;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp),
++ NR_TX, txr->me, 0);
++#endif
+
+ /* Clear the old descriptor contents */
+ IGB_TX_LOCK(txr);
+@@ -3231,6 +3244,13 @@
+ m_freem(txbuf->m_head);
+ txbuf->m_head = NULL;
+ }
++#ifdef DEV_NETMAP
++ if (slot) {
++ netmap_load_map(txr->txtag, txbuf->map,
++ NMB(slot), adapter->rx_mbuf_sz);
++ slot++;
++ }
++#endif /* DEV_NETMAP */
+ /* clear the watch index */
+ txbuf->next_eop = -1;
+ }
+@@ -3626,6 +3646,19 @@
+
+ IGB_TX_LOCK_ASSERT(txr);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ struct netmap_adapter *na = NA(ifp);
++
++ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
++ IGB_TX_UNLOCK(txr);
++ IGB_CORE_LOCK(adapter);
++ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
++ IGB_CORE_UNLOCK(adapter);
++ IGB_TX_LOCK(txr); // the caller is supposed to own the lock
++ return FALSE;
++ }
++#endif /* DEV_NETMAP */
+ if (txr->tx_avail == adapter->num_tx_desc) {
+ txr->queue_status = IGB_QUEUE_IDLE;
+ return FALSE;
+@@ -3949,6 +3982,10 @@
+ bus_dma_segment_t pseg[1], hseg[1];
+ struct lro_ctrl *lro = &rxr->lro;
+ int rsize, nsegs, error = 0;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(rxr->adapter->ifp),
++ NR_RX, rxr->me, 0);
++#endif
+
+ adapter = rxr->adapter;
+ dev = adapter->dev;
+@@ -3974,6 +4011,18 @@
+ struct mbuf *mh, *mp;
+
+ rxbuf = &rxr->rx_buffers[j];
++#ifdef DEV_NETMAP
++ if (slot) {
++ netmap_load_map(rxr->ptag,
++ rxbuf->pmap, NMB(slot),
++ adapter->rx_mbuf_sz);
++ /* Update descriptor */
++ rxr->rx_base[j].read.pkt_addr =
++ htole64(vtophys(NMB(slot)));
++ slot++;
++ continue;
++ }
++#endif /* DEV_NETMAP */
+ if (rxr->hdr_split == FALSE)
+ goto skip_head;
+
+@@ -4436,6 +4485,19 @@
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ struct netmap_adapter *na = NA(ifp);
++
++ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
++ IGB_RX_UNLOCK(rxr);
++ IGB_CORE_LOCK(adapter);
++ selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
++ IGB_CORE_UNLOCK(adapter);
++ return (0);
++ }
++#endif /* DEV_NETMAP */
++
+ /* Main clean loop */
+ for (i = rxr->next_to_check; count != 0;) {
+ struct mbuf *sendmp, *mh, *mp;
+Index: dev/e1000/if_lem.c
+===================================================================
+--- dev/e1000/if_lem.c (revision 227552)
++++ dev/e1000/if_lem.c (working copy)
+@@ -316,6 +316,10 @@
+ /* Global used in WOL setup with multiport cards */
+ static int global_quad_port_a = 0;
+
++#ifdef DEV_NETMAP
++#include <dev/netmap/if_lem_netmap.h>
++#endif /* DEV_NETMAP */
++
+ /*********************************************************************
+ * Device identification routine
+ *
+@@ -646,6 +650,9 @@
+ adapter->led_dev = led_create(lem_led_func, adapter,
+ device_get_nameunit(dev));
+
++#ifdef DEV_NETMAP
++ lem_netmap_attach(adapter);
++#endif /* DEV_NETMAP */
+ INIT_DEBUGOUT("lem_attach: end");
+
+ return (0);
+@@ -724,6 +731,9 @@
+ callout_drain(&adapter->timer);
+ callout_drain(&adapter->tx_fifo_timer);
+
++#ifdef DEV_NETMAP
++ netmap_detach(ifp);
++#endif /* DEV_NETMAP */
+ lem_free_pci_resources(adapter);
+ bus_generic_detach(dev);
+ if_free(ifp);
+@@ -2637,6 +2647,9 @@
+ lem_setup_transmit_structures(struct adapter *adapter)
+ {
+ struct em_buffer *tx_buffer;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp), NR_TX, 0, 0);
++#endif
+
+ /* Clear the old ring contents */
+ bzero(adapter->tx_desc_base,
+@@ -2650,6 +2663,15 @@
+ bus_dmamap_unload(adapter->txtag, tx_buffer->map);
+ m_freem(tx_buffer->m_head);
+ tx_buffer->m_head = NULL;
++#ifdef DEV_NETMAP
++ if (slot) {
++ /* reload the map for netmap mode */
++ netmap_load_map(adapter->txtag,
++ tx_buffer->map, NMB(slot),
++ NA(adapter->ifp)->buff_size);
++ slot++;
++ }
++#endif /* DEV_NETMAP */
+ tx_buffer->next_eop = -1;
+ }
+
+@@ -2951,6 +2973,12 @@
+
+ EM_TX_LOCK_ASSERT(adapter);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET);
++ return;
++ }
++#endif /* DEV_NETMAP */
+ if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
+ return;
+
+@@ -3181,6 +3209,9 @@
+ {
+ struct em_buffer *rx_buffer;
+ int i, error;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp), NR_RX, 0, 0);
++#endif
+
+ /* Reset descriptor ring */
+ bzero(adapter->rx_desc_base,
+@@ -3200,6 +3231,18 @@
+
+ /* Allocate new ones. */
+ for (i = 0; i < adapter->num_rx_desc; i++) {
++#ifdef DEV_NETMAP
++ if (slot) {
++ netmap_load_map(adapter->rxtag,
++ rx_buffer->map, NMB(slot),
++ NA(adapter->ifp)->buff_size);
++ /* Update descriptor */
++ adapter->rx_desc_base[i].buffer_addr =
++ htole64(vtophys(NMB(slot)));
++ slot++;
++ continue;
++ }
++#endif /* DEV_NETMAP */
+ error = lem_get_buf(adapter, i);
+ if (error)
+ return (error);
+@@ -3407,6 +3450,14 @@
+ bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->rx_rings[0].si, PI_NET);
++ EM_RX_UNLOCK(adapter);
++ return (0);
++ }
++#endif /* DEV_NETMAP */
++
+ if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
+ if (done != NULL)
+ *done = rx_sent;
+Index: dev/e1000/if_em.c
+===================================================================
+--- dev/e1000/if_em.c (revision 227552)
++++ dev/e1000/if_em.c (working copy)
+@@ -399,6 +399,10 @@
+ /* Global used in WOL setup with multiport cards */
+ static int global_quad_port_a = 0;
+
++#ifdef DEV_NETMAP
++#include <dev/netmap/if_em_netmap.h>
++#endif /* DEV_NETMAP */
++
+ /*********************************************************************
+ * Device identification routine
+ *
+@@ -714,6 +718,9 @@
+
+ adapter->led_dev = led_create(em_led_func, adapter,
+ device_get_nameunit(dev));
++#ifdef DEV_NETMAP
++ em_netmap_attach(adapter);
++#endif /* DEV_NETMAP */
+
+ INIT_DEBUGOUT("em_attach: end");
+
+@@ -785,6 +792,10 @@
+ ether_ifdetach(adapter->ifp);
+ callout_drain(&adapter->timer);
+
++#ifdef DEV_NETMAP
++ netmap_detach(ifp);
++#endif /* DEV_NETMAP */
++
+ em_free_pci_resources(adapter);
+ bus_generic_detach(dev);
+ if_free(ifp);
+@@ -3213,6 +3224,10 @@
+ struct adapter *adapter = txr->adapter;
+ struct em_buffer *txbuf;
+ int i;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp),
++ NR_TX, txr->me, 0);
++#endif
+
+ /* Clear the old descriptor contents */
+ EM_TX_LOCK(txr);
+@@ -3232,6 +3247,16 @@
+ m_freem(txbuf->m_head);
+ txbuf->m_head = NULL;
+ }
++#ifdef DEV_NETMAP
++ if (slot) {
++ /* reload the map for netmap mode */
++ netmap_load_map(txr->txtag,
++ txbuf->map, NMB(slot),
++ adapter->rx_mbuf_sz);
++ slot++;
++ }
++#endif /* DEV_NETMAP */
++
+ /* clear the watch index */
+ txbuf->next_eop = -1;
+ }
+@@ -3682,6 +3707,12 @@
+ struct ifnet *ifp = adapter->ifp;
+
+ EM_TX_LOCK_ASSERT(txr);
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->tx_rings[txr->me].si, PI_NET);
++ return (FALSE);
++ }
++#endif /* DEV_NETMAP */
+
+ /* No work, make sure watchdog is off */
+ if (txr->tx_avail == adapter->num_tx_desc) {
+@@ -3978,6 +4009,33 @@
+ if (++j == adapter->num_rx_desc)
+ j = 0;
+ }
++#ifdef DEV_NETMAP
++ {
++ /* slot is NULL if we are not in netmap mode */
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp),
++ NR_RX, rxr->me, rxr->next_to_check);
++ /*
++ * we need to restore all buffer addresses in the ring as they might
++ * be in the wrong state if we are exiting from netmap mode.
++ */
++ for (j = 0; j != adapter->num_rx_desc; ++j) {
++ void *addr;
++ rxbuf = &rxr->rx_buffers[j];
++ if (rxbuf->m_head == NULL && !slot)
++ continue;
++ addr = slot ? NMB(slot) : rxbuf->m_head->m_data;
++ // XXX load or reload ?
++ netmap_load_map(rxr->rxtag, rxbuf->map, addr, adapter->rx_mbuf_sz);
++ /* Update descriptor */
++ rxr->rx_base[j].buffer_addr = htole64(vtophys(addr));
++ bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD);
++ if (slot)
++ slot++;
++ }
++ /* Setup our descriptor indices */
++ NA(adapter->ifp)->rx_rings[rxr->me].nr_hwcur = rxr->next_to_check;
++ }
++#endif /* DEV_NETMAP */
+
+ fail:
+ rxr->next_to_refresh = i;
+@@ -4247,6 +4305,14 @@
+
+ EM_RX_LOCK(rxr);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->rx_rings[rxr->me].si, PI_NET);
++ EM_RX_UNLOCK(rxr);
++ return (0);
++ }
++#endif /* DEV_NETMAP */
++
+ for (i = rxr->next_to_check, processed = 0; count != 0;) {
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+Index: dev/re/if_re.c
+===================================================================
+--- dev/re/if_re.c (revision 227552)
++++ dev/re/if_re.c (working copy)
+@@ -291,6 +291,10 @@
+ static void re_setwol (struct rl_softc *);
+ static void re_clrwol (struct rl_softc *);
+
++#ifdef DEV_NETMAP
++#include <dev/netmap/if_re_netmap.h>
++#endif /* !DEV_NETMAP */
++
+ #ifdef RE_DIAG
+ static int re_diag (struct rl_softc *);
+ #endif
+@@ -1583,6 +1587,9 @@
+ */
+ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+
++#ifdef DEV_NETMAP
++ re_netmap_attach(sc);
++#endif /* DEV_NETMAP */
+ #ifdef RE_DIAG
+ /*
+ * Perform hardware diagnostic on the original RTL8169.
+@@ -1778,6 +1785,9 @@
+ bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
+ }
+
++#ifdef DEV_NETMAP
++ netmap_detach(ifp);
++#endif /* DEV_NETMAP */
+ if (sc->rl_parent_tag)
+ bus_dma_tag_destroy(sc->rl_parent_tag);
+
+@@ -1952,6 +1962,9 @@
+ sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
+ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
+ sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
++#ifdef DEV_NETMAP
++ re_netmap_tx_init(sc);
++#endif /* DEV_NETMAP */
+ /* Set EOR. */
+ desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
+ desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
+@@ -1979,6 +1992,9 @@
+ if ((error = re_newbuf(sc, i)) != 0)
+ return (error);
+ }
++#ifdef DEV_NETMAP
++ re_netmap_rx_init(sc);
++#endif /* DEV_NETMAP */
+
+ /* Flush the RX descriptors */
+
+@@ -2035,6 +2051,12 @@
+ RL_LOCK_ASSERT(sc);
+
+ ifp = sc->rl_ifp;
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->rx_rings->si, PI_NET);
++ return 0;
++ }
++#endif /* DEV_NETMAP */
+ if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
+ jumbo = 1;
+ else
+@@ -2276,6 +2298,12 @@
+ return;
+
+ ifp = sc->rl_ifp;
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET);
++ return;
++ }
++#endif /* DEV_NETMAP */
+ /* Invalidate the TX descriptor list */
+ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
+ sc->rl_ldata.rl_tx_list_map,
+@@ -2794,6 +2822,20 @@
+
+ sc = ifp->if_softc;
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ struct netmap_kring *kring = &NA(ifp)->tx_rings[0];
++ if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) {
++ /* kick the tx unit */
++ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
++#ifdef RE_TX_MODERATION
++ CSR_WRITE_4(sc, RL_TIMERCNT, 1);
++#endif
++ sc->rl_watchdog_timer = 5;
++ }
++ return;
++ }
++#endif /* DEV_NETMAP */
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
+ return;
+Index: dev/ixgbe/ixgbe.c
+===================================================================
+--- dev/ixgbe/ixgbe.c (revision 227552)
++++ dev/ixgbe/ixgbe.c (working copy)
+@@ -313,6 +313,10 @@
+ static int fdir_pballoc = 1;
+ #endif
+
++#ifdef DEV_NETMAP
++#include <dev/netmap/ixgbe_netmap.h>
++#endif /* DEV_NETMAP */
++
+ /*********************************************************************
+ * Device identification routine
+ *
+@@ -578,6 +582,9 @@
+
+ ixgbe_add_hw_stats(adapter);
+
++#ifdef DEV_NETMAP
++ ixgbe_netmap_attach(adapter);
++#endif /* DEV_NETMAP */
+ INIT_DEBUGOUT("ixgbe_attach: end");
+ return (0);
+ err_late:
+@@ -652,6 +659,9 @@
+
+ ether_ifdetach(adapter->ifp);
+ callout_drain(&adapter->timer);
++#ifdef DEV_NETMAP
++ netmap_detach(adapter->ifp);
++#endif /* DEV_NETMAP */
+ ixgbe_free_pci_resources(adapter);
+ bus_generic_detach(dev);
+ if_free(adapter->ifp);
+@@ -1719,6 +1729,7 @@
+ if (++i == adapter->num_tx_desc)
+ i = 0;
+
++ // XXX should we sync each buffer ?
+ txbuf->m_head = NULL;
+ txbuf->eop_index = -1;
+ }
+@@ -2813,6 +2824,10 @@
+ struct adapter *adapter = txr->adapter;
+ struct ixgbe_tx_buf *txbuf;
+ int i;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(adapter->ifp),
++ NR_TX, txr->me, 0);
++#endif
+
+ /* Clear the old ring contents */
+ IXGBE_TX_LOCK(txr);
+@@ -2832,6 +2847,13 @@
+ m_freem(txbuf->m_head);
+ txbuf->m_head = NULL;
+ }
++#ifdef DEV_NETMAP
++ if (slot) {
++ netmap_load_map(txr->txtag, txbuf->map,
++ NMB(slot), adapter->rx_mbuf_sz);
++ slot++;
++ }
++#endif /* DEV_NETMAP */
+ /* Clear the EOP index */
+ txbuf->eop_index = -1;
+ }
+@@ -3310,6 +3332,20 @@
+
+ mtx_assert(&txr->tx_mtx, MA_OWNED);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ struct netmap_adapter *na = NA(ifp);
++
++ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
++ IXGBE_TX_UNLOCK(txr);
++ IXGBE_CORE_LOCK(adapter);
++ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
++ IXGBE_CORE_UNLOCK(adapter);
++ IXGBE_TX_LOCK(txr); // the caller is supposed to own the lock
++ return (FALSE);
++ }
++#endif /* DEV_NETMAP */
++
+ if (txr->tx_avail == adapter->num_tx_desc) {
+ txr->queue_status = IXGBE_QUEUE_IDLE;
+ return FALSE;
+@@ -3698,6 +3734,10 @@
+ bus_dma_segment_t pseg[1], hseg[1];
+ struct lro_ctrl *lro = &rxr->lro;
+ int rsize, nsegs, error = 0;
++#ifdef DEV_NETMAP
++ struct netmap_slot *slot = netmap_reset(NA(rxr->adapter->ifp),
++ NR_RX, rxr->me, 0);
++#endif /* DEV_NETMAP */
+
+ adapter = rxr->adapter;
+ ifp = adapter->ifp;
+@@ -3721,6 +3761,18 @@
+ struct mbuf *mh, *mp;
+
+ rxbuf = &rxr->rx_buffers[j];
++#ifdef DEV_NETMAP
++ if (slot) {
++ netmap_load_map(rxr->ptag,
++ rxbuf->pmap, NMB(slot),
++ adapter->rx_mbuf_sz);
++ /* Update descriptor */
++ rxr->rx_base[j].read.pkt_addr =
++ htole64(vtophys(NMB(slot)));
++ slot++;
++ continue;
++ }
++#endif /* DEV_NETMAP */
+ /*
+ ** Don't allocate mbufs if not
+ ** doing header split, its wasteful
+@@ -4148,6 +4200,18 @@
+
+ IXGBE_RX_LOCK(rxr);
+
++#ifdef DEV_NETMAP
++ if (ifp->if_capenable & IFCAP_NETMAP) {
++ struct netmap_adapter *na = NA(ifp);
++
++ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
++ IXGBE_RX_UNLOCK(rxr);
++ IXGBE_CORE_LOCK(adapter);
++ selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
++ IXGBE_CORE_UNLOCK(adapter);
++ return (0);
++ }
++#endif /* DEV_NETMAP */
+ for (i = rxr->next_to_check; count != 0;) {
+ struct mbuf *sendmp, *mh, *mp;
+ u32 rsc, ptype;
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
new file mode 100644
index 0000000000000..0e220e755d68d
--- /dev/null
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_em_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * netmap changes for if_em.
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h> /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static void em_netmap_block_tasks(struct adapter *);
+static void em_netmap_unblock_tasks(struct adapter *);
+static int em_netmap_reg(struct ifnet *, int onoff);
+static int em_netmap_txsync(void *, u_int, int);
+static int em_netmap_rxsync(void *, u_int, int);
+static void em_netmap_lock_wrapper(void *, int, u_int);
+
+static void
+em_netmap_attach(struct adapter *adapter)
+{
+ struct netmap_adapter na;
+
+ bzero(&na, sizeof(na));
+
+ na.ifp = adapter->ifp;
+ na.separate_locks = 1;
+ na.num_tx_desc = adapter->num_tx_desc;
+ na.num_rx_desc = adapter->num_rx_desc;
+ na.nm_txsync = em_netmap_txsync;
+ na.nm_rxsync = em_netmap_rxsync;
+ na.nm_lock = em_netmap_lock_wrapper;
+ na.nm_register = em_netmap_reg;
+ /*
+ * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
+ * we allocate the buffers on the first register. So we must
+ * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
+ */
+ na.buff_size = MCLBYTES;
+ netmap_attach(&na, adapter->num_queues);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+em_netmap_lock_wrapper(void *_a, int what, u_int queueid)
+{
+ struct adapter *adapter = _a;
+
+ ASSERT(queueid < adapter->num_queues);
+ switch (what) {
+ case NETMAP_CORE_LOCK:
+ EM_CORE_LOCK(adapter);
+ break;
+ case NETMAP_CORE_UNLOCK:
+ EM_CORE_UNLOCK(adapter);
+ break;
+ case NETMAP_TX_LOCK:
+ EM_TX_LOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_TX_UNLOCK:
+ EM_TX_UNLOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_RX_LOCK:
+ EM_RX_LOCK(&adapter->rx_rings[queueid]);
+ break;
+ case NETMAP_RX_UNLOCK:
+ EM_RX_UNLOCK(&adapter->rx_rings[queueid]);
+ break;
+ }
+}
+
+
+static void
+em_netmap_block_tasks(struct adapter *adapter)
+{
+ if (adapter->msix > 1) { /* MSIX */
+ int i;
+ struct tx_ring *txr = adapter->tx_rings;
+ struct rx_ring *rxr = adapter->rx_rings;
+
+ for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ taskqueue_block(txr->tq);
+ taskqueue_drain(txr->tq, &txr->tx_task);
+ taskqueue_block(rxr->tq);
+ taskqueue_drain(rxr->tq, &rxr->rx_task);
+ }
+ } else { /* legacy */
+ taskqueue_block(adapter->tq);
+ taskqueue_drain(adapter->tq, &adapter->link_task);
+ taskqueue_drain(adapter->tq, &adapter->que_task);
+ }
+}
+
+
+static void
+em_netmap_unblock_tasks(struct adapter *adapter)
+{
+ if (adapter->msix > 1) {
+ struct tx_ring *txr = adapter->tx_rings;
+ struct rx_ring *rxr = adapter->rx_rings;
+ int i;
+
+ for (i = 0; i < adapter->num_queues; i++) {
+ taskqueue_unblock(txr->tq);
+ taskqueue_unblock(rxr->tq);
+ }
+ } else { /* legacy */
+ taskqueue_unblock(adapter->tq);
+ }
+}
+
+/*
+ * register-unregister routine
+ */
+static int
+em_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (na == NULL)
+ return EINVAL; /* no netmap support here */
+
+ em_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ em_netmap_block_tasks(adapter);
+
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit for later restore.
+ * XXX also if_start and if_qflush ?
+ */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ em_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore if_transmit */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ em_init_locked(adapter); /* also enable intr */
+
+ }
+ em_netmap_unblock_tasks(adapter);
+ return (error);
+}
+
+/*
+ * Reconcile hardware and user view of the transmit ring, see
+ * ixgbe.c for details.
+ */
+static int
+em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ /* generate an interrupt approximately every half ring */
+ int report_frequency = kring->nkr_num_slots >> 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ EM_TX_LOCK(txr);
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ /* record completed transmissions TODO
+ *
+ * instead of using TDH, we could read the transmitted status bit.
+ */
+ j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (j >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", j);
+ j -= kring->nkr_num_slots;
+ }
+ int delta = j - txr->next_to_clean;
+ if (delta) {
+ /* new transmissions were completed, increment
+ ring->nr_hwavail. */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = j;
+ kring->nr_hwavail += delta;
+ }
+
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
+ j = kring->nr_hwcur;
+ if (j != k) { /* we have packets to send */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct e1000_tx_desc *curr = &txr->tx_base[j];
+ struct em_buffer *txbuf = &txr->tx_buffers[j];
+ int flags = ((slot->flags & NS_REPORT) ||
+ j == 0 || j == report_frequency) ?
+ E1000_TXD_CMD_RS : 0;
+ void *addr = NMB(slot);
+ int len = slot->len;
+ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ if (do_lock)
+ EM_TX_UNLOCK(txr);
+ return netmap_ring_reinit(kring);
+ }
+
+ slot->flags &= ~NS_REPORT;
+ curr->upper.data = 0;
+ curr->lower.data =
+ htole32(
+ adapter->txd_cmd |
+ (E1000_TXD_CMD_EOP | flags) |
+ slot->len);
+ if (slot->flags & NS_BUF_CHANGED) {
+ curr->buffer_addr = htole64(vtophys(addr));
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(txr->txtag, txbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(txr->txtag, txbuf->map,
+ BUS_DMASYNC_PREWRITE);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwcur = ring->cur;
+
+ /* decrease avail by number of sent packets */
+ ring->avail -= n;
+ kring->nr_hwavail = ring->avail;
+
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me),
+ ring->cur);
+ }
+ if (do_lock)
+ EM_TX_UNLOCK(txr);
+ return 0;
+}
+
+/*
+ * Reconcile kernel and user view of the receive ring, see ixgbe.c
+ */
+static int
+em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ EM_RX_LOCK(rxr);
+ /* XXX check sync modes */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ /* acknowledge all the received packets. */
+ j = rxr->next_to_check;
+ for (n = 0; ; n++) {
+ struct e1000_rx_desc *curr = &rxr->rx_base[j];
+
+ if ((curr->status & E1000_RXD_STAT_DD) == 0)
+ break;
+ ring->slot[j].len = le16toh(curr->length);
+ bus_dmamap_sync(rxr->tag, rxr->rx_buffers[j].map,
+ BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ }
+ if (n) {
+ rxr->next_to_check = j;
+ kring->nr_hwavail += n;
+ }
+
+ /* skip past packets that userspace has already processed:
+ * making them available for reception.
+ * advance nr_hwcur and issue a bus_dmamap_sync on the
+ * buffers so it is safe to write to them.
+ * Also increase nr_hwavail
+ */
+ j = kring->nr_hwcur;
+ if (j != k) { /* userspace has read some packets. */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct e1000_rx_desc *curr = &rxr->rx_base[j];
+ struct em_buffer *rxbuf = &rxr->rx_buffers[j];
+ void *addr = NMB(slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ if (do_lock)
+ EM_RX_UNLOCK(rxr);
+ return netmap_ring_reinit(kring);
+ }
+
+ curr->status = 0;
+ if (slot->flags & NS_BUF_CHANGED) {
+ curr->buffer_addr = htole64(vtophys(addr));
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(rxr->rxtag, rxbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(rxr->rxtag, rxbuf->map,
+ BUS_DMASYNC_PREREAD);
+
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = ring->cur;
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /*
+ * IMPORTANT: we must leave one free slot in the ring,
+ * so move j back by one unit
+ */
+ j = (j == 0) ? lim : j - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ }
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail ;
+ if (do_lock)
+ EM_RX_UNLOCK(rxr);
+ return 0;
+}
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
new file mode 100644
index 0000000000000..0c147063b2112
--- /dev/null
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2011 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_igb_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * netmap modifications for igb
+ * contribured by Ahmed Kooli
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h> /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int igb_netmap_reg(struct ifnet *, int onoff);
+static int igb_netmap_txsync(void *, u_int, int);
+static int igb_netmap_rxsync(void *, u_int, int);
+static void igb_netmap_lock_wrapper(void *, int, u_int);
+
+
+static void
+igb_netmap_attach(struct adapter *adapter)
+{
+ struct netmap_adapter na;
+
+ bzero(&na, sizeof(na));
+
+ na.ifp = adapter->ifp;
+ na.separate_locks = 1;
+ na.num_tx_desc = adapter->num_tx_desc;
+ na.num_rx_desc = adapter->num_rx_desc;
+ na.nm_txsync = igb_netmap_txsync;
+ na.nm_rxsync = igb_netmap_rxsync;
+ na.nm_lock = igb_netmap_lock_wrapper;
+ na.nm_register = igb_netmap_reg;
+ /*
+ * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
+ * we allocate the buffers on the first register. So we must
+ * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
+ */
+ na.buff_size = MCLBYTES;
+ netmap_attach(&na, adapter->num_queues);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+igb_netmap_lock_wrapper(void *_a, int what, u_int queueid)
+{
+ struct adapter *adapter = _a;
+
+ ASSERT(queueid < adapter->num_queues);
+ switch (what) {
+ case NETMAP_CORE_LOCK:
+ IGB_CORE_LOCK(adapter);
+ break;
+ case NETMAP_CORE_UNLOCK:
+ IGB_CORE_UNLOCK(adapter);
+ break;
+ case NETMAP_TX_LOCK:
+ IGB_TX_LOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_TX_UNLOCK:
+ IGB_TX_UNLOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_RX_LOCK:
+ IGB_RX_LOCK(&adapter->rx_rings[queueid]);
+ break;
+ case NETMAP_RX_UNLOCK:
+ IGB_RX_UNLOCK(&adapter->rx_rings[queueid]);
+ break;
+ }
+}
+
+
+/*
+ * support for netmap register/unregisted. We are already under core lock.
+ * only called on the first init or the last unregister.
+ */
+static int
+igb_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (!na)
+ return EINVAL;
+
+ igb_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit to restore it later */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ igb_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore if_transmit */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ igb_init_locked(adapter); /* also enables intr */
+ }
+ return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ *
+ * Userspace has filled tx slots up to cur (excluded).
+ * The last unused slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available
+ * (using the special value -1 to indicate idle transmit ring).
+ * The function must first update avail to what the kernel
+ * knows, subtract the newly used slots (cur - nr_hwcur)
+ * from both avail and nr_hwavail, and set nr_hwcur = cur
+ * issuing a dmamap_sync on all slots.
+ *
+ * Check parameters in the struct netmap_ring.
+ * We don't use avail, only check for bogus values.
+ * Make sure cur is valid, and same goes for buffer indexes and lengths.
+ * To avoid races, read the values once, and never use those from
+ * the ring afterwards.
+ */
+static int
+igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ /* generate an interrupt approximately every half ring */
+ int report_frequency = kring->nkr_num_slots >> 1;
+
+ k = ring->cur; /* ring is not protected by any lock */
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ IGB_TX_LOCK(txr);
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ /* record completed transmissions. TODO
+ *
+ * Instead of reading from the TDH register, we could and try to check
+ * the status bit of descriptor packets.
+ */
+ j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (j >= kring->nkr_num_slots) /* XXX can it happen ? */
+ j -= kring->nkr_num_slots;
+ int delta = j - txr->next_to_clean;
+ if (delta) {
+ /* new tx were completed */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = j;
+ kring->nr_hwavail += delta;
+ }
+
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
+ j = kring->nr_hwcur;
+ if (j != k) { /* we have new packets to send */
+ u32 olinfo_status = 0;
+ n = 0;
+
+ /* 82575 needs the queue index added */
+ if (adapter->hw.mac.type == e1000_82575)
+ olinfo_status |= txr->me << 4;
+
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct igb_tx_buffer *txbuf = &txr->tx_buffers[j];
+ union e1000_adv_tx_desc *curr =
+ (union e1000_adv_tx_desc *)&txr->tx_base[j];
+ void *addr = NMB(slot);
+ int flags = ((slot->flags & NS_REPORT) ||
+ j == 0 || j == report_frequency) ?
+ E1000_ADVTXD_DCMD_RS : 0;
+ int len = slot->len;
+
+ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ if (do_lock)
+ IGB_TX_UNLOCK(txr);
+ return netmap_ring_reinit(kring);
+ }
+
+ slot->flags &= ~NS_REPORT;
+ curr->read.buffer_addr = htole64(vtophys(addr));
+ curr->read.olinfo_status =
+ htole32(olinfo_status |
+ (len<< E1000_ADVTXD_PAYLEN_SHIFT));
+ curr->read.cmd_type_len =
+ htole32(len | E1000_ADVTXD_DTYP_DATA |
+ E1000_ADVTXD_DCMD_IFCS |
+ E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_EOP | flags);
+ if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(txr->txtag, txbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(txr->txtag, txbuf->map,
+ BUS_DMASYNC_PREWRITE);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwcur = k;
+
+ /* decrease avail by number of sent packets */
+ ring->avail -= n;
+ kring->nr_hwavail = ring->avail;
+
+ /* Set the watchdog */
+ txr->queue_status = IGB_QUEUE_WORKING;
+ txr->watchdog_time = ticks;
+
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), k);
+ }
+ if (do_lock)
+ IGB_TX_UNLOCK(txr);
+ return 0;
+}
+
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ *
+ * Userspace has read rx slots up to cur (excluded).
+ * The last unread slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available.
+ * We must subtract the newly consumed slots (cur - nr_hwcur)
+ * from nr_hwavail, clearing the descriptors for the next
+ * read, tell the hardware that they are available,
+ * and set nr_hwcur = cur and avail = nr_hwavail.
+ * issuing a dmamap_sync on all slots.
+ */
+static int
+igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur; /* ring is not protected by any lock */
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ IGB_RX_LOCK(rxr);
+
+ /* Sync the ring. */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ j = rxr->next_to_check;
+ for (n = 0; ; n++) {
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
+ uint32_t staterr = le32toh(curr->wb.upper.status_error);
+
+ if ((staterr & E1000_RXD_STAT_DD) == 0)
+ break;
+ ring->slot[j].len = le16toh(curr->wb.upper.length);
+
+ bus_dmamap_sync(rxr->ptag,
+ rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ }
+ if (n) {
+ rxr->next_to_check = j;
+ kring->nr_hwavail += n;
+ if (kring->nr_hwavail >= lim - 10) {
+ ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
+ }
+ }
+
+ /* skip past packets that userspace has already processed,
+ * making them available for reception.
+ * advance nr_hwcur and issue a bus_dmamap_sync on the
+ * buffers so it is safe to write to them.
+ * Also increase nr_hwavail
+ */
+ j = kring->nr_hwcur;
+ if (j != k) { /* userspace has read some packets. */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = ring->slot + j;
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
+ struct igb_rx_buf *rxbuf = rxr->rx_buffers + j;
+ void *addr = NMB(slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ if (do_lock)
+ IGB_RX_UNLOCK(rxr);
+ return netmap_ring_reinit(kring);
+ }
+
+ curr->wb.upper.status_error = 0;
+ curr->read.pkt_addr = htole64(vtophys(addr));
+ if (slot->flags & NS_BUF_CHANGED) {
+ netmap_reload_map(rxr->ptag, rxbuf->pmap,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+ BUS_DMASYNC_PREREAD);
+
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = ring->cur;
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /* IMPORTANT: we must leave one free slot in the ring,
+ * so move j back by one unit
+ */
+ j = (j == 0) ? lim : j - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ }
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail ;
+ if (do_lock)
+ IGB_RX_UNLOCK(rxr);
+ return 0;
+}
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
new file mode 100644
index 0000000000000..a8f34989bcc4c
--- /dev/null
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_lem_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * netmap support for if_lem.c
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h> /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int lem_netmap_reg(struct ifnet *, int onoff);
+static int lem_netmap_txsync(void *, u_int, int);
+static int lem_netmap_rxsync(void *, u_int, int);
+static void lem_netmap_lock_wrapper(void *, int, u_int);
+
+
+SYSCTL_NODE(_dev, OID_AUTO, lem, CTLFLAG_RW, 0, "lem card");
+
+static void
+lem_netmap_attach(struct adapter *adapter)
+{
+ struct netmap_adapter na;
+
+ bzero(&na, sizeof(na));
+
+ na.ifp = adapter->ifp;
+ na.separate_locks = 1;
+ na.num_tx_desc = adapter->num_tx_desc;
+ na.num_rx_desc = adapter->num_rx_desc;
+ na.nm_txsync = lem_netmap_txsync;
+ na.nm_rxsync = lem_netmap_rxsync;
+ na.nm_lock = lem_netmap_lock_wrapper;
+ na.nm_register = lem_netmap_reg;
+ na.buff_size = MCLBYTES;
+ netmap_attach(&na, 1);
+}
+
+
+static void
+lem_netmap_lock_wrapper(void *_a, int what, u_int ringid)
+{
+ struct adapter *adapter = _a;
+
+ /* only one ring here so ignore the ringid */
+ switch (what) {
+ case NETMAP_CORE_LOCK:
+ EM_CORE_LOCK(adapter);
+ break;
+ case NETMAP_CORE_UNLOCK:
+ EM_CORE_UNLOCK(adapter);
+ break;
+ case NETMAP_TX_LOCK:
+ EM_TX_LOCK(adapter);
+ break;
+ case NETMAP_TX_UNLOCK:
+ EM_TX_UNLOCK(adapter);
+ break;
+ case NETMAP_RX_LOCK:
+ EM_RX_LOCK(adapter);
+ break;
+ case NETMAP_RX_UNLOCK:
+ EM_RX_UNLOCK(adapter);
+ break;
+ }
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring. see ixgbe.c
+ */
+static int
+lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->tx_rings[0];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ /* generate an interrupt approximately every half ring */
+ int report_frequency = kring->nkr_num_slots >> 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ EM_TX_LOCK(adapter);
+ bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ /* record completed transmissions TODO
+ *
+ * instead of using TDH, we could read the transmitted status bit.
+ */
+ j = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+ if (j >= kring->nkr_num_slots) { /* can it happen ? */
+ D("bad TDH %d", j);
+ j -= kring->nkr_num_slots;
+ }
+ int delta = j - adapter->next_tx_to_clean;
+ if (delta) {
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ adapter->next_tx_to_clean = j;
+ kring->nr_hwavail += delta;
+ }
+
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
+ j = kring->nr_hwcur;
+ if (j != k) { /* we have new packets to send */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct e1000_tx_desc *curr = &adapter->tx_desc_base[j];
+ struct em_buffer *txbuf = &adapter->tx_buffer_area[j];
+ void *addr = NMB(slot);
+ int flags = ((slot->flags & NS_REPORT) ||
+ j == 0 || j == report_frequency) ?
+ E1000_TXD_CMD_RS : 0;
+ int len = slot->len;
+
+ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ if (do_lock)
+ EM_TX_UNLOCK(adapter);
+ return netmap_ring_reinit(kring);
+ }
+
+ curr->upper.data = 0;
+ /* always interrupt. XXX make it conditional */
+ curr->lower.data =
+ htole32( adapter->txd_cmd | len |
+ (E1000_TXD_CMD_EOP | flags) );
+ if (slot->flags & NS_BUF_CHANGED) {
+ curr->buffer_addr = htole64(vtophys(addr));
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(adapter->txtag, txbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(adapter->txtag, txbuf->map,
+ BUS_DMASYNC_PREWRITE);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwcur = ring->cur;
+
+ /* decrease avail by number of sent packets */
+ ring->avail -= n;
+ kring->nr_hwavail = ring->avail;
+
+ bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), ring->cur);
+ }
+ if (do_lock)
+ EM_TX_UNLOCK(adapter);
+ return 0;
+}
+
+
+/*
+ * Reconcile kernel and user view of the receive ring. see ixgbe.c
+ */
+static int
+lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[0];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ EM_RX_LOCK(adapter);
+ /* XXX check sync modes */
+ bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ /* acknowldge all the received packets. */
+ j = adapter->next_rx_desc_to_check;
+ for (n = 0; ; n++) {
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
+ int len = le16toh(adapter->rx_desc_base[j].length) - 4; // CRC
+
+ if ((curr->status & E1000_RXD_STAT_DD) == 0)
+ break;
+
+ if (len < 0) {
+ D("bogus pkt size at %d", j);
+ len = 0;
+ }
+ ring->slot[j].len = len;
+ bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[j].map,
+ BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ }
+ if (n) {
+ adapter->next_rx_desc_to_check = j;
+ kring->nr_hwavail += n;
+ }
+
+ /* skip past packets that userspace has already processed,
+ * making them available for reception. We don't need to set
+ * the length as it is the same for all slots.
+ */
+ j = kring->nr_hwcur;
+ if (j != k) { /* userspace has read some packets. */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
+ struct em_buffer *rxbuf = &adapter->rx_buffer_area[j];
+ void *addr = NMB(slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ if (do_lock)
+ EM_RX_UNLOCK(adapter);
+ return netmap_ring_reinit(kring);
+ }
+ curr = &adapter->rx_desc_base[j];
+ curr->status = 0;
+ if (slot->flags & NS_BUF_CHANGED) {
+ curr->buffer_addr = htole64(vtophys(addr));
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(adapter->rxtag, rxbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(adapter->rxtag, rxbuf->map,
+ BUS_DMASYNC_PREREAD);
+
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = ring->cur;
+ bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /*
+ * IMPORTANT: we must leave one free slot in the ring,
+ * so move j back by one unit
+ */
+ j = (j == 0) ? lim : j - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), j);
+ }
+
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail ;
+ if (do_lock)
+ EM_RX_UNLOCK(adapter);
+ return 0;
+}
+
+
+/*
+ * Register/unregister routine
+ */
+static int
+lem_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (!na)
+ return EINVAL;
+
+ lem_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ /* lem_netmap_block_tasks(adapter); */
+#ifndef EM_LEGACY_IRQ
+ taskqueue_block(adapter->tq);
+ taskqueue_drain(adapter->tq, &adapter->rxtx_task);
+ taskqueue_drain(adapter->tq, &adapter->link_task);
+#endif /* !EM_LEGCY_IRQ */
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit to restore it when exiting.
+ * XXX what about if_start and if_qflush ?
+ */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ lem_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore non-netmap mode */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ lem_init_locked(adapter); /* also enables intr */
+ }
+
+#ifndef EM_LEGACY_IRQ
+ taskqueue_unblock(adapter->tq);
+#endif /* !EM_LEGCY_IRQ */
+
+ return (error);
+}
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
new file mode 100644
index 0000000000000..efccf3a795bc7
--- /dev/null
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2011 Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: if_re_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * netmap support for if_re
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <vm/vm.h>
+#include <vm/pmap.h> /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int re_netmap_reg(struct ifnet *, int onoff);
+static int re_netmap_txsync(void *, u_int, int);
+static int re_netmap_rxsync(void *, u_int, int);
+static void re_netmap_lock_wrapper(void *, int, u_int);
+
+static void
+re_netmap_attach(struct rl_softc *sc)
+{
+ struct netmap_adapter na;
+
+ bzero(&na, sizeof(na));
+
+ na.ifp = sc->rl_ifp;
+ na.separate_locks = 0;
+ na.num_tx_desc = sc->rl_ldata.rl_tx_desc_cnt;
+ na.num_rx_desc = sc->rl_ldata.rl_rx_desc_cnt;
+ na.nm_txsync = re_netmap_txsync;
+ na.nm_rxsync = re_netmap_rxsync;
+ na.nm_lock = re_netmap_lock_wrapper;
+ na.nm_register = re_netmap_reg;
+ na.buff_size = MCLBYTES;
+ netmap_attach(&na, 1);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ * We should not use the tx/rx locks
+ */
+static void
+re_netmap_lock_wrapper(void *_a, int what, u_int queueid)
+{
+ struct rl_softc *adapter = _a;
+
+ switch (what) {
+ case NETMAP_CORE_LOCK:
+ RL_LOCK(adapter);
+ break;
+ case NETMAP_CORE_UNLOCK:
+ RL_UNLOCK(adapter);
+ break;
+
+ case NETMAP_TX_LOCK:
+ case NETMAP_RX_LOCK:
+ case NETMAP_TX_UNLOCK:
+ case NETMAP_RX_UNLOCK:
+ D("invalid lock call %d, no tx/rx locks here", what);
+ break;
+ }
+}
+
+
+/*
+ * support for netmap register/unregisted. We are already under core lock.
+ * only called on the first register or the last unregister.
+ */
+static int
+re_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct rl_softc *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (!na)
+ return EINVAL;
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ re_stop(adapter);
+
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit and restore it */
+ na->if_transmit = ifp->if_transmit;
+ /* XXX if_start and if_qflush ??? */
+ ifp->if_transmit = netmap_start;
+
+ re_init_locked(adapter);
+
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore if_transmit */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ re_init_locked(adapter); /* also enables intr */
+ }
+ return (error);
+
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ *
+ * Userspace has filled tx slots up to cur (excluded).
+ * The last unused slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available
+ * (using the special value -1 to indicate idle transmit ring).
+ * The function must first update avail to what the kernel
+ * knows (translating the -1 to nkr_num_slots - 1),
+ * subtract the newly used slots (cur - nr_hwcur)
+ * from both avail and nr_hwavail, and set nr_hwcur = cur
+ * issuing a dmamap_sync on all slots.
+ */
+static int
+re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct rl_softc *sc = a;
+ struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
+ struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ RL_LOCK(sc);
+
+ /* Sync the TX descriptor list */
+ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
+ sc->rl_ldata.rl_tx_list_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ /* record completed transmissions */
+ for (n = 0, j = sc->rl_ldata.rl_tx_considx;
+ j != sc->rl_ldata.rl_tx_prodidx;
+ n++, j = RL_TX_DESC_NXT(sc, j)) {
+ uint32_t cmdstat =
+ le32toh(sc->rl_ldata.rl_tx_list[j].rl_cmdstat);
+ if (cmdstat & RL_TDESC_STAT_OWN)
+ break;
+ }
+ if (n > 0) {
+ sc->rl_ldata.rl_tx_considx = j;
+ sc->rl_ldata.rl_tx_free += n;
+ kring->nr_hwavail += n;
+ }
+
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
+ /* we trust prodidx, not hwcur */
+ j = kring->nr_hwcur = sc->rl_ldata.rl_tx_prodidx;
+ if (j != k) { /* we have new packets to send */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[j];
+ int cmd = slot->len | RL_TDESC_CMD_EOF |
+ RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
+ void *addr = NMB(slot);
+ int len = slot->len;
+
+ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ if (do_lock)
+ RL_UNLOCK(sc);
+ return netmap_ring_reinit(kring);
+ }
+
+ if (j == lim) /* mark end of ring */
+ cmd |= RL_TDESC_CMD_EOR;
+
+ if (slot->flags & NS_BUF_CHANGED) {
+ uint64_t paddr = vtophys(addr);
+ desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+ desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
+ txd[j].tx_dmamap, addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+ slot->flags &= ~NS_REPORT;
+ desc->rl_cmdstat = htole32(cmd);
+ bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
+ txd[j].tx_dmamap, BUS_DMASYNC_PREWRITE);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ sc->rl_ldata.rl_tx_prodidx = kring->nr_hwcur = ring->cur;
+
+ /* decrease avail by number of sent packets */
+ ring->avail -= n;
+ kring->nr_hwavail = ring->avail;
+
+ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
+ sc->rl_ldata.rl_tx_list_map,
+ BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+
+ /* start ? */
+ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
+ }
+ if (do_lock)
+ RL_UNLOCK(sc);
+ return 0;
+}
+
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ *
+ * Userspace has read rx slots up to cur (excluded).
+ * The last unread slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available.
+ * We must subtract the newly consumed slots (cur - nr_hwcur)
+ * from nr_hwavail, clearing the descriptors for the next
+ * read, tell the hardware that they are available,
+ * and set nr_hwcur = cur and avail = nr_hwavail.
+ * issuing a dmamap_sync on all slots.
+ */
+static int
+re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct rl_softc *sc = a;
+ struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
+ struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur;
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ RL_LOCK(sc);
+ /* XXX check sync modes */
+ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
+ sc->rl_ldata.rl_rx_list_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ /*
+ * The device uses all the buffers in the ring, so we need
+ * another termination condition in addition to RL_RDESC_STAT_OWN
+ * cleared (all buffers could have it cleared. The easiest one
+ * is to limit the amount of data reported up to 'lim'
+ */
+ j = sc->rl_ldata.rl_rx_prodidx;
+ for (n = kring->nr_hwavail; n < lim ; n++) {
+ struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[j];
+ uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
+ uint32_t total_len;
+
+ if ((rxstat & RL_RDESC_STAT_OWN) != 0)
+ break;
+ total_len = rxstat & sc->rl_rxlenmask;
+ /* XXX subtract crc */
+ total_len = (total_len < 4) ? 0 : total_len - 4;
+ kring->ring->slot[j].len = total_len;
+ /* sync was in re_newbuf() */
+ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
+ rxd[j].rx_dmamap, BUS_DMASYNC_POSTREAD);
+ j = RL_RX_DESC_NXT(sc, j);
+ }
+ if (n != kring->nr_hwavail) {
+ sc->rl_ldata.rl_rx_prodidx = j;
+ sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
+ kring->nr_hwavail = n;
+ }
+
+ /* skip past packets that userspace has already processed,
+ * making them available for reception.
+ * advance nr_hwcur and issue a bus_dmamap_sync on the
+ * buffers so it is safe to write to them.
+ * Also increase nr_hwavail
+ */
+ j = kring->nr_hwcur;
+ if (j != k) { /* userspace has read some packets. */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = ring->slot + j;
+ struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[j];
+ int cmd = na->buff_size | RL_RDESC_CMD_OWN;
+ void *addr = NMB(slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ if (do_lock)
+ RL_UNLOCK(sc);
+ return netmap_ring_reinit(kring);
+ }
+
+ if (j == lim) /* mark end of ring */
+ cmd |= RL_RDESC_CMD_EOR;
+
+ desc->rl_cmdstat = htole32(cmd);
+ slot->flags &= ~NS_REPORT;
+ if (slot->flags & NS_BUF_CHANGED) {
+ uint64_t paddr = vtophys(addr);
+ desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+ desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
+ rxd[j].rx_dmamap, addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
+ rxd[j].rx_dmamap, BUS_DMASYNC_PREREAD);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = k;
+ /* Flush the RX DMA ring */
+
+ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
+ sc->rl_ldata.rl_rx_list_map,
+ BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+ }
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail ;
+ if (do_lock)
+ RL_UNLOCK(sc);
+ return 0;
+}
+
+static void
+re_netmap_tx_init(struct rl_softc *sc)
+{
+ struct rl_txdesc *txd;
+ struct rl_desc *desc;
+ int i;
+ struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
+
+ /* slot is NULL if we are not in netmap mode */
+ if (!slot)
+ return;
+ /* in netmap mode, overwrite addresses and maps */
+ txd = sc->rl_ldata.rl_tx_desc;
+ desc = sc->rl_ldata.rl_tx_list;
+
+ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
+ void *addr = NMB(slot+i);
+ uint64_t paddr = vtophys(addr);
+
+ desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+ desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ netmap_load_map(sc->rl_ldata.rl_tx_mtag,
+ txd[i].tx_dmamap, addr, na->buff_size);
+ }
+}
+
+static void
+re_netmap_rx_init(struct rl_softc *sc)
+{
+ /* slot is NULL if we are not in netmap mode */
+ struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
+ struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
+ uint32_t cmdstat;
+ int i;
+
+ if (!slot)
+ return;
+
+ for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
+ void *addr = NMB(slot+i);
+ uint64_t paddr = vtophys(addr);
+
+ desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
+ desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ cmdstat = slot[i].len = na->buff_size; // XXX
+ if (i == sc->rl_ldata.rl_rx_desc_cnt - 1)
+ cmdstat |= RL_RDESC_CMD_EOR;
+ desc[i].rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
+
+ netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
+ sc->rl_ldata.rl_rx_desc[i].rx_dmamap,
+ addr, na->buff_size);
+ }
+}
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
new file mode 100644
index 0000000000000..a4d5491d67f12
--- /dev/null
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: ixgbe_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * netmap modifications for ixgbe
+ */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+// #include <vm/vm.h>
+// #include <vm/pmap.h> /* vtophys ? */
+#include <dev/netmap/netmap_kern.h>
+
+static int ixgbe_netmap_reg(struct ifnet *, int onoff);
+static int ixgbe_netmap_txsync(void *, u_int, int);
+static int ixgbe_netmap_rxsync(void *, u_int, int);
+static void ixgbe_netmap_lock_wrapper(void *, int, u_int);
+
+
+SYSCTL_NODE(_dev, OID_AUTO, ixgbe, CTLFLAG_RW, 0, "ixgbe card");
+
+static void
+ixgbe_netmap_attach(struct adapter *adapter)
+{
+ struct netmap_adapter na;
+
+ bzero(&na, sizeof(na));
+
+ na.ifp = adapter->ifp;
+ na.separate_locks = 1;
+ na.num_tx_desc = adapter->num_tx_desc;
+ na.num_rx_desc = adapter->num_rx_desc;
+ na.nm_txsync = ixgbe_netmap_txsync;
+ na.nm_rxsync = ixgbe_netmap_rxsync;
+ na.nm_lock = ixgbe_netmap_lock_wrapper;
+ na.nm_register = ixgbe_netmap_reg;
+ /*
+ * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
+ * we allocate the buffers on the first register. So we must
+ * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
+ */
+ na.buff_size = MCLBYTES;
+ netmap_attach(&na, adapter->num_queues);
+}
+
+
+/*
+ * wrapper to export locks to the generic code
+ */
+static void
+ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid)
+{
+ struct adapter *adapter = _a;
+
+ ASSERT(queueid < adapter->num_queues);
+ switch (what) {
+ case NETMAP_CORE_LOCK:
+ IXGBE_CORE_LOCK(adapter);
+ break;
+ case NETMAP_CORE_UNLOCK:
+ IXGBE_CORE_UNLOCK(adapter);
+ break;
+ case NETMAP_TX_LOCK:
+ IXGBE_TX_LOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_TX_UNLOCK:
+ IXGBE_TX_UNLOCK(&adapter->tx_rings[queueid]);
+ break;
+ case NETMAP_RX_LOCK:
+ IXGBE_RX_LOCK(&adapter->rx_rings[queueid]);
+ break;
+ case NETMAP_RX_UNLOCK:
+ IXGBE_RX_UNLOCK(&adapter->rx_rings[queueid]);
+ break;
+ }
+}
+
+
+/*
+ * support for netmap register/unregisted. We are already under core lock.
+ * only called on the first init or the last unregister.
+ */
+static int
+ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (!na)
+ return EINVAL;
+
+ ixgbe_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit to restore it later */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ ixgbe_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore if_transmit */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ ixgbe_init_locked(adapter); /* also enables intr */
+ }
+ return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
+ *
+ * Userspace has filled tx slots up to cur (excluded).
+ * The last unused slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available
+ * (using the special value -1 to indicate idle transmit ring).
+ * The function must first update avail to what the kernel
+ * knows, subtract the newly used slots (cur - nr_hwcur)
+ * from both avail and nr_hwavail, and set nr_hwcur = cur
+ * issuing a dmamap_sync on all slots.
+ *
+ * Check parameters in the struct netmap_ring.
+ * We don't use avail, only check for bogus values.
+ * Make sure cur is valid, and same goes for buffer indexes and lengths.
+ * To avoid races, read the values once, and never use those from
+ * the ring afterwards.
+ */
+static int
+ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n = 0, lim = kring->nkr_num_slots - 1;
+
+ /* generate an interrupt approximately every half ring */
+ int report_frequency = kring->nkr_num_slots >> 1;
+
+ k = ring->cur; /* ring is not protected by any lock */
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ IXGBE_TX_LOCK(txr);
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
+ j = kring->nr_hwcur;
+ if (j != k) { /* we have new packets to send */
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j];
+ struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[j];
+ union ixgbe_adv_tx_desc *curr = &txr->tx_base[j];
+ void *addr = NMB(slot);
+ int flags = ((slot->flags & NS_REPORT) ||
+ j == 0 || j == report_frequency) ?
+ IXGBE_TXD_CMD_RS : 0;
+ int len = slot->len;
+
+ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ if (do_lock)
+ IXGBE_TX_UNLOCK(txr);
+ return netmap_ring_reinit(kring);
+ }
+
+ slot->flags &= ~NS_REPORT;
+ curr->read.buffer_addr = htole64(vtophys(addr));
+ curr->read.olinfo_status = 0;
+ curr->read.cmd_type_len =
+ htole32(txr->txd_cmd | len |
+ (IXGBE_ADVTXD_DTYP_DATA |
+ IXGBE_ADVTXD_DCMD_IFCS |
+ IXGBE_TXD_CMD_EOP | flags) );
+ if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, unload and reload map */
+ netmap_reload_map(txr->txtag, txbuf->map,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(txr->txtag, txbuf->map,
+ BUS_DMASYNC_PREWRITE);
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwcur = k;
+
+ /* decrease avail by number of sent packets */
+ ring->avail -= n;
+ kring->nr_hwavail = ring->avail;
+
+ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), k);
+ }
+
+ if (n == 0 || kring->nr_hwavail < 1) {
+ /* record completed transmissions. TODO
+ *
+ * The datasheet discourages the use of TDH to find out the
+ * number of sent packets; the right way to do so, is to check
+ * the DD bit inside the status of a packet descriptor. On the
+ * other hand, we avoid to set the `report status' bit for
+ * *all* outgoing packets (kind of interrupt mitigation),
+ * consequently the DD bit is not guaranteed to be set for all
+ * the packets: thats way, for the moment we continue to use
+ * TDH.
+ */
+ j = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
+ if (j >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", j);
+ j -= kring->nkr_num_slots;
+ }
+ int delta = j - txr->next_to_clean;
+ if (delta) {
+ /* new transmissions were completed, increment
+ ring->nr_hwavail. */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = j;
+ kring->nr_hwavail += delta;
+ ring->avail = kring->nr_hwavail;
+ }
+ }
+
+ if (do_lock)
+ IXGBE_TX_UNLOCK(txr);
+ return 0;
+}
+
+
+/*
+ * Reconcile kernel and user view of the receive ring.
+ *
+ * Userspace has read rx slots up to cur (excluded).
+ * The last unread slot previously known to the kernel was nr_hwcur,
+ * and the last interrupt reported nr_hwavail slots available.
+ * We must subtract the newly consumed slots (cur - nr_hwcur)
+ * from nr_hwavail, clearing the descriptors for the next
+ * read, tell the hardware that they are available,
+ * and set nr_hwcur = cur and avail = nr_hwavail.
+ * issuing a dmamap_sync on all slots.
+ */
+static int
+ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
+{
+ struct adapter *adapter = a;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ int j, k, n, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur; /* ring is not protected by any lock */
+ if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (do_lock)
+ IXGBE_RX_LOCK(rxr);
+ /* XXX check sync modes */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+ j = rxr->next_to_check;
+ for (n = 0; ; n++) {
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
+ uint32_t staterr = le32toh(curr->wb.upper.status_error);
+
+ if ((staterr & IXGBE_RXD_STAT_DD) == 0)
+ break;
+ ring->slot[j].len = le16toh(curr->wb.upper.length);
+ bus_dmamap_sync(rxr->ptag,
+ rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ }
+ if (n) {
+ rxr->next_to_check = j;
+ kring->nr_hwavail += n;
+ if (kring->nr_hwavail >= lim - 10) {
+ ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
+ }
+ }
+
+ /* skip past packets that userspace has already processed,
+ * making them available for reception.
+ * advance nr_hwcur and issue a bus_dmamap_sync on the
+ * buffers so it is safe to write to them.
+ * Also increase nr_hwavail
+ */
+ j = kring->nr_hwcur;
+ if (j != k) { /* userspace has read some packets. */
+ n = 0;
+ while (j != k) {
+ struct netmap_slot *slot = ring->slot + j;
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
+ struct ixgbe_rx_buf *rxbuf = rxr->rx_buffers + j;
+ void *addr = NMB(slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ if (do_lock)
+ IXGBE_RX_UNLOCK(rxr);
+ return netmap_ring_reinit(kring);
+ }
+
+ curr->wb.upper.status_error = 0;
+ curr->read.pkt_addr = htole64(vtophys(addr));
+ if (slot->flags & NS_BUF_CHANGED) {
+ netmap_reload_map(rxr->ptag, rxbuf->pmap,
+ addr, na->buff_size);
+ slot->flags &= ~NS_BUF_CHANGED;
+ }
+
+ bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+ BUS_DMASYNC_PREREAD);
+
+ j = (j == lim) ? 0 : j + 1;
+ n++;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = ring->cur;
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ /* IMPORTANT: we must leave one free slot in the ring,
+ * so move j back by one unit
+ */
+ j = (j == 0) ? lim : j - 1;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), j);
+ }
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail ;
+ if (do_lock)
+ IXGBE_RX_UNLOCK(rxr);
+ return 0;
+}
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
new file mode 100644
index 0000000000000..7645a4e6e32bd
--- /dev/null
+++ b/sys/dev/netmap/netmap.c
@@ -0,0 +1,1762 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: netmap.c 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * This module supports memory mapped access to network devices,
+ * see netmap(4).
+ *
+ * The module uses a large, memory pool allocated by the kernel
+ * and accessible as mmapped memory by multiple userspace threads/processes.
+ * The memory pool contains packet buffers and "netmap rings",
+ * i.e. user-accessible copies of the interface's queues.
+ *
+ * Access to the network card works like this:
+ * 1. a process/thread issues one or more open() on /dev/netmap, to create
+ * select()able file descriptor on which events are reported.
+ * 2. on each descriptor, the process issues an ioctl() to identify
+ * the interface that should report events to the file descriptor.
+ * 3. on each descriptor, the process issues an mmap() request to
+ * map the shared memory region within the process' address space.
+ * The list of interesting queues is indicated by a location in
+ * the shared memory region.
+ * 4. using the functions in the netmap(4) userspace API, a process
+ * can look up the occupation state of a queue, access memory buffers,
+ * and retrieve received packets or enqueue packets to transmit.
+ * 5. using some ioctl()s the process can synchronize the userspace view
+ * of the queue with the actual status in the kernel. This includes both
+ * receiving the notification of new packets, and transmitting new
+ * packets on the output interface.
+ * 6. select() or poll() can be used to wait for events on individual
+ * transmit or receive queues (or all queues for a given interface).
+ */
+
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct */
+#include <sys/uio.h> /* uio struct */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/mman.h> /* PROT_EXEC */
+#include <sys/poll.h>
+#include <vm/vm.h> /* vtophys */
+#include <vm/pmap.h> /* vtophys */
+#include <sys/socket.h> /* sockaddrs */
+#include <machine/bus.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+
+MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+
+/*
+ * lock and unlock for the netmap memory allocator
+ */
+#define NMA_LOCK() mtx_lock(&netmap_mem_d->nm_mtx);
+#define NMA_UNLOCK() mtx_unlock(&netmap_mem_d->nm_mtx);
+
+/*
+ * Default amount of memory pre-allocated by the module.
+ * We start with a large size and then shrink our demand
+ * according to what is avalable when the module is loaded.
+ * At the moment the block is contiguous, but we can easily
+ * restrict our demand to smaller units (16..64k)
+ */
+#define NETMAP_MEMORY_SIZE (64 * 1024 * PAGE_SIZE)
+static void * netmap_malloc(size_t size, const char *msg);
+static void netmap_free(void *addr, const char *msg);
+
+/*
+ * Allocator for a pool of packet buffers. For each buffer we have
+ * one entry in the bitmap to signal the state. Allocation scans
+ * the bitmap, but since this is done only on attach, we are not
+ * too worried about performance
+ * XXX if we need to allocate small blocks, a translation
+ * table is used both for kernel virtual address and physical
+ * addresses.
+ */
+struct netmap_buf_pool {
+ u_int total_buffers; /* total buffers. */
+ u_int free;
+ u_int bufsize;
+ char *base; /* buffer base address */
+ uint32_t *bitmap; /* one bit per buffer, 1 means free */
+};
+struct netmap_buf_pool nm_buf_pool;
+/* XXX move these two vars back into netmap_buf_pool */
+u_int netmap_total_buffers;
+char *netmap_buffer_base;
+
+/* user-controlled variables */
+int netmap_verbose;
+
+static int no_timestamp; /* don't timestamp on rxsync */
+
+SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
+SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
+ CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
+SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
+ CTLFLAG_RW, &no_timestamp, 0, "no_timestamp");
+SYSCTL_INT(_dev_netmap, OID_AUTO, total_buffers,
+ CTLFLAG_RD, &nm_buf_pool.total_buffers, 0, "total_buffers");
+SYSCTL_INT(_dev_netmap, OID_AUTO, free_buffers,
+ CTLFLAG_RD, &nm_buf_pool.free, 0, "free_buffers");
+
+/*
+ * Allocate n buffers from the ring, and fill the slot.
+ * Buffer 0 is the 'junk' buffer.
+ */
+static void
+netmap_new_bufs(struct netmap_buf_pool *p, struct netmap_slot *slot, u_int n)
+{
+ uint32_t bi = 0; /* index in the bitmap */
+ uint32_t mask, j, i = 0; /* slot counter */
+
+ if (n > p->free) {
+ D("only %d out of %d buffers available", i, n);
+ return;
+ }
+ /* termination is guaranteed by p->free */
+ while (i < n && p->free > 0) {
+ uint32_t cur = p->bitmap[bi];
+ if (cur == 0) { /* bitmask is fully used */
+ bi++;
+ continue;
+ }
+ /* locate a slot */
+ for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1) ;
+ p->bitmap[bi] &= ~mask; /* slot in use */
+ p->free--;
+ slot[i].buf_idx = bi*32+j;
+ slot[i].len = p->bufsize;
+ slot[i].flags = NS_BUF_CHANGED;
+ i++;
+ }
+ ND("allocated %d buffers, %d available", n, p->free);
+}
+
+
+static void
+netmap_free_buf(struct netmap_buf_pool *p, uint32_t i)
+{
+ uint32_t pos, mask;
+ if (i >= p->total_buffers) {
+ D("invalid free index %d", i);
+ return;
+ }
+ pos = i / 32;
+ mask = 1 << (i % 32);
+ if (p->bitmap[pos] & mask) {
+ D("slot %d already free", i);
+ return;
+ }
+ p->bitmap[pos] |= mask;
+ p->free++;
+}
+
+
+/* Descriptor of the memory objects handled by our memory allocator. */
+struct netmap_mem_obj {
+ TAILQ_ENTRY(netmap_mem_obj) nmo_next; /* next object in the
+ chain. */
+ int nmo_used; /* flag set on used memory objects. */
+ size_t nmo_size; /* size of the memory area reserved for the
+ object. */
+ void *nmo_data; /* pointer to the memory area. */
+};
+
+/* Wrap our memory objects to make them ``chainable``. */
+TAILQ_HEAD(netmap_mem_obj_h, netmap_mem_obj);
+
+
+/* Descriptor of our custom memory allocator. */
+struct netmap_mem_d {
+ struct mtx nm_mtx; /* lock used to handle the chain of memory
+ objects. */
+ struct netmap_mem_obj_h nm_molist; /* list of memory objects */
+ size_t nm_size; /* total amount of memory used for rings etc. */
+ size_t nm_totalsize; /* total amount of allocated memory
+ (the difference is used for buffers) */
+ size_t nm_buf_start; /* offset of packet buffers.
+ This is page-aligned. */
+ size_t nm_buf_len; /* total memory for buffers */
+ void *nm_buffer; /* pointer to the whole pre-allocated memory
+ area. */
+};
+
+
+/* Structure associated to each thread which registered an interface. */
+struct netmap_priv_d {
+ struct netmap_if *np_nifp; /* netmap interface descriptor. */
+
+ struct ifnet *np_ifp; /* device for which we hold a reference */
+ int np_ringid; /* from the ioctl */
+ u_int np_qfirst, np_qlast; /* range of rings to scan */
+ uint16_t np_txpoll;
+};
+
+
+static struct cdev *netmap_dev; /* /dev/netmap character device. */
+static struct netmap_mem_d *netmap_mem_d; /* Our memory allocator. */
+
+
+static d_mmap_t netmap_mmap;
+static d_ioctl_t netmap_ioctl;
+static d_poll_t netmap_poll;
+
+#ifdef NETMAP_KEVENT
+static d_kqfilter_t netmap_kqfilter;
+#endif
+
+static struct cdevsw netmap_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = "netmap",
+ .d_mmap = netmap_mmap,
+ .d_ioctl = netmap_ioctl,
+ .d_poll = netmap_poll,
+#ifdef NETMAP_KEVENT
+ .d_kqfilter = netmap_kqfilter,
+#endif
+};
+
+#ifdef NETMAP_KEVENT
+static int netmap_kqread(struct knote *, long);
+static int netmap_kqwrite(struct knote *, long);
+static void netmap_kqdetach(struct knote *);
+
+static struct filterops netmap_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = netmap_kqdetach,
+ .f_event = netmap_kqread,
+};
+
+static struct filterops netmap_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = netmap_kqdetach,
+ .f_event = netmap_kqwrite,
+};
+
+/*
+ * support for the kevent() system call.
+ *
+ * This is the kevent filter, and is executed each time a new event
+ * is triggered on the device. This function execute some operation
+ * depending on the received filter.
+ *
+ * The implementation should test the filters and should implement
+ * filter operations we are interested on (a full list in /sys/event.h).
+ *
+ * On a match we should:
+ * - set kn->kn_fop
+ * - set kn->kn_hook
+ * - call knlist_add() to deliver the event to the application.
+ *
+ * Return 0 if the event should be delivered to the application.
+ */
+static int
+netmap_kqfilter(struct cdev *dev, struct knote *kn)
+{
+ /* declare variables needed to read/write */
+
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ if (netmap_verbose)
+ D("%s kqfilter: EVFILT_READ" ifp->if_xname);
+
+ /* read operations */
+ kn->kn_fop = &netmap_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ if (netmap_verbose)
+ D("%s kqfilter: EVFILT_WRITE" ifp->if_xname);
+
+ /* write operations */
+ kn->kn_fop = &netmap_write_filterops;
+ break;
+
+ default:
+ if (netmap_verbose)
+ D("%s kqfilter: invalid filter" ifp->if_xname);
+ return(EINVAL);
+ }
+
+ kn->kn_hook = 0;//
+ knlist_add(&netmap_sc->tun_rsel.si_note, kn, 0);
+
+ return (0);
+}
+#endif /* NETMAP_KEVENT */
+
+/*
+ * File descriptor's private data destructor.
+ *
+ * Call nm_register(ifp,0) to stop netmap mode on the interface and
+ * revert to normal operation. We expect that np_ifp has not gone.
+ */
+static void
+netmap_dtor(void *data)
+{
+ struct netmap_priv_d *priv = data;
+ struct ifnet *ifp = priv->np_ifp;
+ struct netmap_adapter *na = NA(ifp);
+ struct netmap_if *nifp = priv->np_nifp;
+
+ if (0)
+ printf("%s starting for %p ifp %p\n", __FUNCTION__, priv,
+ priv ? priv->np_ifp : NULL);
+
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_LOCK, 0);
+
+ na->refcount--;
+ if (na->refcount <= 0) { /* last instance */
+ u_int i;
+
+ D("deleting last netmap instance for %s", ifp->if_xname);
+ /*
+ * there is a race here with *_netmap_task() and
+ * netmap_poll(), which don't run under NETMAP_CORE_LOCK.
+ * na->refcount == 0 && na->ifp->if_capenable & IFCAP_NETMAP
+ * (aka NETMAP_DELETING(na)) are a unique marker that the
+ * device is dying.
+ * Before destroying stuff we sleep a bit, and then complete
+ * the job. NIOCREG should realize the condition and
+ * loop until they can continue; the other routines
+ * should check the condition at entry and quit if
+ * they cannot run.
+ */
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_UNLOCK, 0);
+ tsleep(na, 0, "NIOCUNREG", 4);
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_LOCK, 0);
+ na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
+ /* Wake up any sleeping threads. netmap_poll will
+ * then return POLLERR
+ */
+ for (i = 0; i < na->num_queues + 2; i++) {
+ selwakeuppri(&na->tx_rings[i].si, PI_NET);
+ selwakeuppri(&na->rx_rings[i].si, PI_NET);
+ }
+ /* release all buffers */
+ NMA_LOCK();
+ for (i = 0; i < na->num_queues + 1; i++) {
+ int j, lim;
+ struct netmap_ring *ring;
+
+ ND("tx queue %d", i);
+ ring = na->tx_rings[i].ring;
+ lim = na->tx_rings[i].nkr_num_slots;
+ for (j = 0; j < lim; j++)
+ netmap_free_buf(&nm_buf_pool,
+ ring->slot[j].buf_idx);
+
+ ND("rx queue %d", i);
+ ring = na->rx_rings[i].ring;
+ lim = na->rx_rings[i].nkr_num_slots;
+ for (j = 0; j < lim; j++)
+ netmap_free_buf(&nm_buf_pool,
+ ring->slot[j].buf_idx);
+ }
+ NMA_UNLOCK();
+ netmap_free(na->tx_rings[0].ring, "shadow rings");
+ wakeup(na);
+ }
+ netmap_free(nifp, "nifp");
+
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_UNLOCK, 0);
+
+ if_rele(ifp);
+
+ bzero(priv, sizeof(*priv)); /* XXX for safety */
+ free(priv, M_DEVBUF);
+}
+
+
+
+/*
+ * Create and return a new ``netmap_if`` object, and possibly also
+ * rings and packet buffors.
+ *
+ * Return NULL on failure.
+ */
+static void *
+netmap_if_new(const char *ifname, struct netmap_adapter *na)
+{
+ struct netmap_if *nifp;
+ struct netmap_ring *ring;
+ char *buff;
+ u_int i, len, ofs;
+ u_int n = na->num_queues + 1; /* shorthand, include stack queue */
+
+ /*
+ * the descriptor is followed inline by an array of offsets
+ * to the tx and rx rings in the shared memory region.
+ */
+ len = sizeof(struct netmap_if) + 2 * n * sizeof(ssize_t);
+ nifp = netmap_malloc(len, "nifp");
+ if (nifp == NULL)
+ return (NULL);
+
+ /* initialize base fields */
+ *(int *)(uintptr_t)&nifp->ni_num_queues = na->num_queues;
+ strncpy(nifp->ni_name, ifname, IFNAMSIZ);
+
+ (na->refcount)++; /* XXX atomic ? we are under lock */
+ if (na->refcount > 1)
+ goto final;
+
+ /*
+ * If this is the first instance, allocate the shadow rings and
+ * buffers for this card (one for each hw queue, one for the host).
+ * The rings are contiguous, but have variable size.
+ * The entire block is reachable at
+ * na->tx_rings[0].ring
+ */
+
+ len = n * (2 * sizeof(struct netmap_ring) +
+ (na->num_tx_desc + na->num_rx_desc) *
+ sizeof(struct netmap_slot) );
+ buff = netmap_malloc(len, "shadow rings");
+ if (buff == NULL) {
+ D("failed to allocate %d bytes for %s shadow ring",
+ len, ifname);
+error:
+ (na->refcount)--;
+ netmap_free(nifp, "nifp, rings failed");
+ return (NULL);
+ }
+ /* do we have the bufers ? we are in need of num_tx_desc buffers for
+ * each tx ring and num_tx_desc buffers for each rx ring. */
+ len = n * (na->num_tx_desc + na->num_rx_desc);
+ NMA_LOCK();
+ if (nm_buf_pool.free < len) {
+ NMA_UNLOCK();
+ netmap_free(buff, "not enough bufs");
+ goto error;
+ }
+ /*
+ * in the kring, store the pointers to the shared rings
+ * and initialize the rings. We are under NMA_LOCK().
+ */
+ ofs = 0;
+ for (i = 0; i < n; i++) {
+ struct netmap_kring *kring;
+ int numdesc;
+
+ /* Transmit rings */
+ kring = &na->tx_rings[i];
+ numdesc = na->num_tx_desc;
+ bzero(kring, sizeof(*kring));
+ kring->na = na;
+
+ ring = kring->ring = (struct netmap_ring *)(buff + ofs);
+ *(ssize_t *)(uintptr_t)&ring->buf_ofs =
+ nm_buf_pool.base - (char *)ring;
+ ND("txring[%d] at %p ofs %d", i, ring, ring->buf_ofs);
+ *(int *)(int *)(uintptr_t)&ring->num_slots =
+ kring->nkr_num_slots = numdesc;
+
+ /*
+ * IMPORTANT:
+ * Always keep one slot empty, so we can detect new
+ * transmissions comparing cur and nr_hwcur (they are
+ * the same only if there are no new transmissions).
+ */
+ ring->avail = kring->nr_hwavail = numdesc - 1;
+ ring->cur = kring->nr_hwcur = 0;
+ netmap_new_bufs(&nm_buf_pool, ring->slot, numdesc);
+
+ ofs += sizeof(struct netmap_ring) +
+ numdesc * sizeof(struct netmap_slot);
+
+ /* Receive rings */
+ kring = &na->rx_rings[i];
+ numdesc = na->num_rx_desc;
+ bzero(kring, sizeof(*kring));
+ kring->na = na;
+
+ ring = kring->ring = (struct netmap_ring *)(buff + ofs);
+ *(ssize_t *)(uintptr_t)&ring->buf_ofs =
+ nm_buf_pool.base - (char *)ring;
+ ND("rxring[%d] at %p offset %d", i, ring, ring->buf_ofs);
+ *(int *)(int *)(uintptr_t)&ring->num_slots =
+ kring->nkr_num_slots = numdesc;
+ ring->cur = kring->nr_hwcur = 0;
+ ring->avail = kring->nr_hwavail = 0; /* empty */
+ netmap_new_bufs(&nm_buf_pool, ring->slot, numdesc);
+ ofs += sizeof(struct netmap_ring) +
+ numdesc * sizeof(struct netmap_slot);
+ }
+ NMA_UNLOCK();
+ for (i = 0; i < n+1; i++) {
+ // XXX initialize the selrecord structs.
+ }
+final:
+ /*
+ * fill the slots for the rx and tx queues. They contain the offset
+ * between the ring and nifp, so the information is usable in
+ * userspace to reach the ring from the nifp.
+ */
+ for (i = 0; i < n; i++) {
+ char *base = (char *)nifp;
+ *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
+ (char *)na->tx_rings[i].ring - base;
+ *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n] =
+ (char *)na->rx_rings[i].ring - base;
+ }
+ return (nifp);
+}
+
+
+/*
+ * mmap(2) support for the "netmap" device.
+ *
+ * Expose all the memory previously allocated by our custom memory
+ * allocator: this way the user has only to issue a single mmap(2), and
+ * can work on all the data structures flawlessly.
+ *
+ * Return 0 on success, -1 otherwise.
+ */
+static int
+#if __FreeBSD_version < 900000
+netmap_mmap(__unused struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr,
+ int nprot)
+#else
+netmap_mmap(__unused struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
+ int nprot, __unused vm_memattr_t *memattr)
+#endif
+{
+ if (nprot & PROT_EXEC)
+ return (-1); // XXX -1 or EINVAL ?
+ ND("request for offset 0x%x", (uint32_t)offset);
+ *paddr = vtophys(netmap_mem_d->nm_buffer) + offset;
+
+ return (0);
+}
+
+
+/*
+ * handler for synchronization of the queues from/to the host
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+ struct netmap_kring *kring = &na->tx_rings[na->num_queues];
+ struct netmap_ring *ring = kring->ring;
+ struct mbuf *head = NULL, *tail = NULL, *m;
+ u_int n, lim = kring->nkr_num_slots - 1;
+
+ na->nm_lock(na->ifp->if_softc, NETMAP_CORE_LOCK, 0);
+
+ /* Take packets from hwcur to cur and pass them up.
+ * In case of no buffers we give up. At the end of the loop,
+ * the queue is drained in all cases.
+ */
+ for (n = kring->nr_hwcur; n != ring->cur;) {
+ struct netmap_slot *slot = &ring->slot[n];
+
+ n = (n == lim) ? 0 : n + 1;
+ if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
+ D("bad pkt at %d len %d", n, slot->len);
+ continue;
+ }
+ m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+
+ if (m == NULL)
+ break;
+ if (tail)
+ tail->m_nextpkt = m;
+ else
+ head = m;
+ tail = m;
+ m->m_nextpkt = NULL;
+ }
+ kring->nr_hwcur = ring->cur;
+ kring->nr_hwavail = ring->avail = lim;
+ na->nm_lock(na->ifp->if_softc, NETMAP_CORE_UNLOCK, 0);
+
+ /* send packets up, outside the lock */
+ while ((m = head) != NULL) {
+ head = head->m_nextpkt;
+ m->m_nextpkt = NULL;
+ m->m_pkthdr.rcvif = na->ifp;
+ if (netmap_verbose & NM_VERB_HOST)
+ D("sending up pkt %p size %d", m, m->m_pkthdr.len);
+ (na->ifp->if_input)(na->ifp, m);
+ }
+}
+
+/*
+ * This routine also does the selrecord if called from the poll handler
+ * (we know because td != NULL).
+ */
+static void
+netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
+{
+ struct netmap_kring *kring = &na->rx_rings[na->num_queues];
+ struct netmap_ring *ring = kring->ring;
+ int delta;
+
+ na->nm_lock(na->ifp->if_softc, NETMAP_CORE_LOCK, 0);
+
+ /* skip past packets processed by userspace,
+ * and then sync cur/avail with hwcur/hwavail
+ */
+ delta = ring->cur - kring->nr_hwcur;
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ kring->nr_hwavail -= delta;
+ kring->nr_hwcur = ring->cur;
+ ring->avail = kring->nr_hwavail;
+ if (ring->avail == 0 && td)
+ selrecord(td, &kring->si);
+ if (ring->avail && (netmap_verbose & NM_VERB_HOST))
+ D("%d pkts from stack", ring->avail);
+ na->nm_lock(na->ifp->if_softc, NETMAP_CORE_UNLOCK, 0);
+}
+
+
+/*
+ * get a refcounted reference to an interface.
+ * Return ENXIO if the interface does not exist, EINVAL if netmap
+ * is not supported by the interface.
+ * If successful, hold a reference.
+ */
+static int
+get_ifp(const char *name, struct ifnet **ifp)
+{
+ *ifp = ifunit_ref(name);
+ if (*ifp == NULL)
+ return (ENXIO);
+ /* can do this if the capability exists and if_pspare[0]
+ * points to the netmap descriptor.
+ */
+ if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
+ return 0; /* valid pointer, we hold the refcount */
+ if_rele(*ifp);
+ return EINVAL; // not NETMAP capable
+}
+
+
+/*
+ * Error routine called when txsync/rxsync detects an error.
+ * Can't do much more than resetting cur = hwcur, avail = hwavail.
+ * Return 1 on reinit.
+ */
+int
+netmap_ring_reinit(struct netmap_kring *kring)
+{
+ struct netmap_ring *ring = kring->ring;
+ u_int i, lim = kring->nkr_num_slots - 1;
+ int errors = 0;
+
+ D("called for %s", kring->na->ifp->if_xname);
+ if (ring->cur > lim)
+ errors++;
+ for (i = 0; i <= lim; i++) {
+ u_int idx = ring->slot[i].buf_idx;
+ u_int len = ring->slot[i].len;
+ if (idx < 2 || idx >= netmap_total_buffers) {
+ if (!errors++)
+ D("bad buffer at slot %d idx %d len %d ", i, idx, len);
+ ring->slot[i].buf_idx = 0;
+ ring->slot[i].len = 0;
+ } else if (len > NETMAP_BUF_SIZE) {
+ ring->slot[i].len = 0;
+ if (!errors++)
+ D("bad len %d at slot %d idx %d",
+ len, i, idx);
+ }
+ }
+ if (errors) {
+ int pos = kring - kring->na->tx_rings;
+ int n = kring->na->num_queues + 2;
+
+ D("total %d errors", errors);
+ errors++;
+ D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
+ kring->na->ifp->if_xname,
+ pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
+ ring->cur, kring->nr_hwcur,
+ ring->avail, kring->nr_hwavail);
+ ring->cur = kring->nr_hwcur;
+ ring->avail = kring->nr_hwavail;
+ ring->flags |= NR_REINIT;
+ kring->na->flags |= NR_REINIT;
+ }
+ return (errors ? 1 : 0);
+}
+
+/*
+ * Clean the reinit flag for our rings.
+ * XXX at the moment, clear for all rings
+ */
+static void
+netmap_clean_reinit(struct netmap_adapter *na)
+{
+ //struct netmap_kring *kring;
+ u_int i;
+
+ na->flags &= ~NR_REINIT;
+ D("--- NR_REINIT reset on %s", na->ifp->if_xname);
+ for (i = 0; i < na->num_queues + 1; i++) {
+ na->tx_rings[i].ring->flags &= ~NR_REINIT;
+ na->rx_rings[i].ring->flags &= ~NR_REINIT;
+ }
+}
+
+/*
+ * Set the ring ID. For devices with a single queue, a request
+ * for all rings is the same as a single ring.
+ */
+static int
+netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
+{
+ struct ifnet *ifp = priv->np_ifp;
+ struct netmap_adapter *na = NA(ifp);
+ void *adapter = na->ifp->if_softc; /* shorthand */
+ u_int i = ringid & NETMAP_RING_MASK;
+ /* first time we don't lock */
+ int need_lock = (priv->np_qfirst != priv->np_qlast);
+
+ if ( (ringid & NETMAP_HW_RING) && i >= na->num_queues) {
+ D("invalid ring id %d", i);
+ return (EINVAL);
+ }
+ if (need_lock)
+ na->nm_lock(adapter, NETMAP_CORE_LOCK, 0);
+ priv->np_ringid = ringid;
+ if (ringid & NETMAP_SW_RING) {
+ priv->np_qfirst = na->num_queues;
+ priv->np_qlast = na->num_queues + 1;
+ } else if (ringid & NETMAP_HW_RING) {
+ priv->np_qfirst = i;
+ priv->np_qlast = i + 1;
+ } else {
+ priv->np_qfirst = 0;
+ priv->np_qlast = na->num_queues;
+ }
+ priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
+ if (need_lock)
+ na->nm_lock(adapter, NETMAP_CORE_UNLOCK, 0);
+ if (ringid & NETMAP_SW_RING)
+ D("ringid %s set to SW RING", ifp->if_xname);
+ else if (ringid & NETMAP_HW_RING)
+ D("ringid %s set to HW RING %d", ifp->if_xname,
+ priv->np_qfirst);
+ else
+ D("ringid %s set to all %d HW RINGS", ifp->if_xname,
+ priv->np_qlast);
+ return 0;
+}
+
+/*
+ * ioctl(2) support for the "netmap" device.
+ *
+ * Following a list of accepted commands:
+ * - NIOCGINFO
+ * - SIOCGIFADDR just for convenience
+ * - NIOCREGIF
+ * - NIOCUNREGIF
+ * - NIOCTXSYNC
+ * - NIOCRXSYNC
+ *
+ * Return 0 on success, errno otherwise.
+ */
+static int
+netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
+ __unused int fflag, __unused struct thread *td)
+{
+ struct netmap_priv_d *priv = NULL;
+ struct ifnet *ifp;
+ struct nmreq *nmr = (struct nmreq *) data;
+ struct netmap_adapter *na;
+ void *adapter;
+ int error;
+ u_int i;
+ struct netmap_if *nifp;
+
+ error = devfs_get_cdevpriv((void **)&priv);
+ if (error != ENOENT && error != 0)
+ return (error);
+
+ error = 0; /* Could be ENOENT */
+ switch (cmd) {
+ case NIOCGINFO: /* return capabilities etc */
+ /* memsize is always valid */
+ nmr->nr_memsize = netmap_mem_d->nm_totalsize;
+ nmr->nr_offset = 0;
+ nmr->nr_numrings = 0;
+ nmr->nr_numslots = 0;
+ if (nmr->nr_name[0] == '\0') /* just get memory info */
+ break;
+ error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */
+ if (error)
+ break;
+ na = NA(ifp); /* retrieve netmap_adapter */
+ nmr->nr_numrings = na->num_queues;
+ nmr->nr_numslots = na->num_tx_desc;
+ if_rele(ifp); /* return the refcount */
+ break;
+
+ case NIOCREGIF:
+ if (priv != NULL) /* thread already registered */
+ return netmap_set_ringid(priv, nmr->nr_ringid);
+ /* find the interface and a reference */
+ error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
+ if (error)
+ break;
+ na = NA(ifp); /* retrieve netmap adapter */
+ adapter = na->ifp->if_softc; /* shorthand */
+ /*
+ * Allocate the private per-thread structure.
+ * XXX perhaps we can use a blocking malloc ?
+ */
+ priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (priv == NULL) {
+ error = ENOMEM;
+ if_rele(ifp); /* return the refcount */
+ break;
+ }
+
+
+ for (i = 10; i > 0; i--) {
+ na->nm_lock(adapter, NETMAP_CORE_LOCK, 0);
+ if (!NETMAP_DELETING(na))
+ break;
+ na->nm_lock(adapter, NETMAP_CORE_UNLOCK, 0);
+ tsleep(na, 0, "NIOCREGIF", hz/10);
+ }
+ if (i == 0) {
+ D("too many NIOCREGIF attempts, give up");
+ error = EINVAL;
+ free(priv, M_DEVBUF);
+ if_rele(ifp); /* return the refcount */
+ break;
+ }
+
+ priv->np_ifp = ifp; /* store the reference */
+ error = netmap_set_ringid(priv, nmr->nr_ringid);
+ if (error)
+ goto error;
+ priv->np_nifp = nifp = netmap_if_new(nmr->nr_name, na);
+ if (nifp == NULL) { /* allocation failed */
+ error = ENOMEM;
+ } else if (ifp->if_capenable & IFCAP_NETMAP) {
+ /* was already set */
+ } else {
+ /* Otherwise set the card in netmap mode
+ * and make it use the shared buffers.
+ */
+ error = na->nm_register(ifp, 1); /* mode on */
+ if (error) {
+ /*
+ * do something similar to netmap_dtor().
+ */
+ netmap_free(na->tx_rings[0].ring, "rings, reg.failed");
+ free(na->tx_rings, M_DEVBUF);
+ na->tx_rings = na->rx_rings = NULL;
+ na->refcount--;
+ netmap_free(nifp, "nifp, rings failed");
+ nifp = NULL;
+ }
+ }
+
+ if (error) { /* reg. failed, release priv and ref */
+error:
+ na->nm_lock(adapter, NETMAP_CORE_UNLOCK, 0);
+ free(priv, M_DEVBUF);
+ if_rele(ifp); /* return the refcount */
+ break;
+ }
+
+ na->nm_lock(adapter, NETMAP_CORE_UNLOCK, 0);
+ error = devfs_set_cdevpriv(priv, netmap_dtor);
+
+ if (error != 0) {
+ /* could not assign the private storage for the
+ * thread, call the destructor explicitly.
+ */
+ netmap_dtor(priv);
+ break;
+ }
+
+ /* return the offset of the netmap_if object */
+ nmr->nr_numrings = na->num_queues;
+ nmr->nr_numslots = na->num_tx_desc;
+ nmr->nr_memsize = netmap_mem_d->nm_totalsize;
+ nmr->nr_offset =
+ ((char *) nifp - (char *) netmap_mem_d->nm_buffer);
+ break;
+
+ case NIOCUNREGIF:
+ if (priv == NULL)
+ return (ENXIO);
+
+ /* the interface is unregistered inside the
+ destructor of the private data. */
+ devfs_clear_cdevpriv();
+ break;
+
+ case NIOCTXSYNC:
+ case NIOCRXSYNC:
+ if (priv == NULL)
+ return (ENXIO);
+ ifp = priv->np_ifp; /* we have a reference */
+ na = NA(ifp); /* retrieve netmap adapter */
+ adapter = ifp->if_softc; /* shorthand */
+
+ if (na->flags & NR_REINIT)
+ netmap_clean_reinit(na);
+
+ if (priv->np_qfirst == na->num_queues) {
+ /* queues to/from host */
+ if (cmd == NIOCTXSYNC)
+ netmap_sync_to_host(na);
+ else
+ netmap_sync_from_host(na, NULL);
+ return error;
+ }
+
+ for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+ if (cmd == NIOCTXSYNC) {
+ struct netmap_kring *kring = &na->tx_rings[i];
+ if (netmap_verbose & NM_VERB_TXSYNC)
+ D("sync tx ring %d cur %d hwcur %d",
+ i, kring->ring->cur,
+ kring->nr_hwcur);
+ na->nm_txsync(adapter, i, 1 /* do lock */);
+ if (netmap_verbose & NM_VERB_TXSYNC)
+ D("after sync tx ring %d cur %d hwcur %d",
+ i, kring->ring->cur,
+ kring->nr_hwcur);
+ } else {
+ na->nm_rxsync(adapter, i, 1 /* do lock */);
+ microtime(&na->rx_rings[i].ring->ts);
+ }
+ }
+
+ break;
+
+ case BIOCIMMEDIATE:
+ case BIOCGHDRCMPLT:
+ case BIOCSHDRCMPLT:
+ case BIOCSSEESENT:
+ D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
+ break;
+
+ default:
+ {
+ /*
+ * allow device calls
+ */
+ struct socket so;
+ bzero(&so, sizeof(so));
+ error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
+ if (error)
+ break;
+ so.so_vnet = ifp->if_vnet;
+ // so->so_proto not null.
+ error = ifioctl(&so, cmd, data, td);
+ if_rele(ifp);
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * select(2) and poll(2) handlers for the "netmap" device.
+ *
+ * Can be called for one or more queues.
+ * Return true the event mask corresponding to ready events.
+ * If there are no ready events, do a selrecord on either individual
+ * selfd or on the global one.
+ * Device-dependent parts (locking and sync of tx/rx rings)
+ * are done through callbacks.
+ */
+static int
+netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
+{
+ struct netmap_priv_d *priv = NULL;
+ struct netmap_adapter *na;
+ struct ifnet *ifp;
+ struct netmap_kring *kring;
+ u_int i, check_all, want_tx, want_rx, revents = 0;
+ void *adapter;
+
+ if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
+ return POLLERR;
+
+ ifp = priv->np_ifp;
+ // XXX check for deleting() ?
+ if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
+ return POLLERR;
+
+ if (netmap_verbose & 0x8000)
+ D("device %s events 0x%x", ifp->if_xname, events);
+ want_tx = events & (POLLOUT | POLLWRNORM);
+ want_rx = events & (POLLIN | POLLRDNORM);
+
+ adapter = ifp->if_softc;
+ na = NA(ifp); /* retrieve netmap adapter */
+
+ /* pending reinit, report up as a poll error. Pending
+ * reads and writes are lost.
+ */
+ if (na->flags & NR_REINIT) {
+ netmap_clean_reinit(na);
+ revents |= POLLERR;
+ }
+ /* how many queues we are scanning */
+ i = priv->np_qfirst;
+ if (i == na->num_queues) { /* from/to host */
+ if (priv->np_txpoll || want_tx) {
+ /* push any packets up, then we are always ready */
+ kring = &na->tx_rings[i];
+ netmap_sync_to_host(na);
+ revents |= want_tx;
+ }
+ if (want_rx) {
+ kring = &na->rx_rings[i];
+ if (kring->ring->avail == 0)
+ netmap_sync_from_host(na, td);
+ if (kring->ring->avail > 0) {
+ revents |= want_rx;
+ }
+ }
+ return (revents);
+ }
+
+ /*
+ * check_all is set if the card has more than one queue and
+ * the client is polling all of them. If true, we sleep on
+ * the "global" selfd, otherwise we sleep on individual selfd
+ * (we can only sleep on one of them per direction).
+ * The interrupt routine in the driver should always wake on
+ * the individual selfd, and also on the global one if the card
+ * has more than one ring.
+ *
+ * If the card has only one lock, we just use that.
+ * If the card has separate ring locks, we just use those
+ * unless we are doing check_all, in which case the whole
+ * loop is wrapped by the global lock.
+ * We acquire locks only when necessary: if poll is called
+ * when buffers are available, we can just return without locks.
+ *
+ * rxsync() is only called if we run out of buffers on a POLLIN.
+ * txsync() is called if we run out of buffers on POLLOUT, or
+ * there are pending packets to send. The latter can be disabled
+ * passing NETMAP_NO_TX_POLL in the NIOCREG call.
+ */
+ check_all = (i + 1 != priv->np_qlast);
+
+ /*
+ * core_lock indicates what to do with the core lock.
+ * The core lock is used when either the card has no individual
+ * locks, or it has individual locks but we are cheking all
+ * rings so we need the core lock to avoid missing wakeup events.
+ *
+ * It has three possible states:
+ * NO_CL we don't need to use the core lock, e.g.
+ * because we are protected by individual locks.
+ * NEED_CL we need the core lock. In this case, when we
+ * call the lock routine, move to LOCKED_CL
+ * to remember to release the lock once done.
+ * LOCKED_CL core lock is set, so we need to release it.
+ */
+ enum {NO_CL, NEED_CL, LOCKED_CL };
+ int core_lock = (check_all || !na->separate_locks) ?
+ NEED_CL:NO_CL;
+ /*
+ * We start with a lock free round which is good if we have
+ * data available. If this fails, then lock and call the sync
+ * routines.
+ */
+ for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
+ kring = &na->rx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_rx;
+ want_rx = 0; /* also breaks the loop */
+ }
+ }
+ for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
+ kring = &na->tx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_tx;
+ want_tx = 0; /* also breaks the loop */
+ }
+ }
+
+ /*
+ * If we to push packets out (priv->np_txpoll) or want_tx is
+ * still set, we do need to run the txsync calls (on all rings,
+ * to avoid that the tx rings stall).
+ */
+ if (priv->np_txpoll || want_tx) {
+ for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+ kring = &na->tx_rings[i];
+ if (!want_tx && kring->ring->cur == kring->nr_hwcur)
+ continue;
+ if (core_lock == NEED_CL) {
+ na->nm_lock(adapter, NETMAP_CORE_LOCK, 0);
+ core_lock = LOCKED_CL;
+ }
+ if (na->separate_locks)
+ na->nm_lock(adapter, NETMAP_TX_LOCK, i);
+ if (netmap_verbose & NM_VERB_TXSYNC)
+ D("send %d on %s %d",
+ kring->ring->cur,
+ ifp->if_xname, i);
+ if (na->nm_txsync(adapter, i, 0 /* no lock */))
+ revents |= POLLERR;
+
+ if (want_tx) {
+ if (kring->ring->avail > 0) {
+ /* stop at the first ring. We don't risk
+ * starvation.
+ */
+ revents |= want_tx;
+ want_tx = 0;
+ } else if (!check_all)
+ selrecord(td, &kring->si);
+ }
+ if (na->separate_locks)
+ na->nm_lock(adapter, NETMAP_TX_UNLOCK, i);
+ }
+ }
+
+ /*
+ * now if want_rx is still set we need to lock and rxsync.
+ * Do it on all rings because otherwise we starve.
+ */
+ if (want_rx) {
+ for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+ kring = &na->rx_rings[i];
+ if (core_lock == NEED_CL) {
+ na->nm_lock(adapter, NETMAP_CORE_LOCK, 0);
+ core_lock = LOCKED_CL;
+ }
+ if (na->separate_locks)
+ na->nm_lock(adapter, NETMAP_RX_LOCK, i);
+
+ if (na->nm_rxsync(adapter, i, 0 /* no lock */))
+ revents |= POLLERR;
+ if (no_timestamp == 0 ||
+ kring->ring->flags & NR_TIMESTAMP)
+ microtime(&kring->ring->ts);
+
+ if (kring->ring->avail > 0)
+ revents |= want_rx;
+ else if (!check_all)
+ selrecord(td, &kring->si);
+ if (na->separate_locks)
+ na->nm_lock(adapter, NETMAP_RX_UNLOCK, i);
+ }
+ }
+ if (check_all && revents == 0) {
+ i = na->num_queues + 1; /* the global queue */
+ if (want_tx)
+ selrecord(td, &na->tx_rings[i].si);
+ if (want_rx)
+ selrecord(td, &na->rx_rings[i].si);
+ }
+ if (core_lock == LOCKED_CL)
+ na->nm_lock(adapter, NETMAP_CORE_UNLOCK, 0);
+
+ return (revents);
+}
+
+/*------- driver support routines ------*/
+
+/*
+ * Initialize a ``netmap_adapter`` object created by driver on attach.
+ * We allocate a block of memory with room for a struct netmap_adapter
+ * plus two sets of N+2 struct netmap_kring (where N is the number
+ * of hardware rings):
+ * krings 0..N-1 are for the hardware queues.
+ * kring N is for the host stack queue
+ * kring N+1 is only used for the selinfo for all queues.
+ * Return 0 on success, ENOMEM otherwise.
+ */
+int
+netmap_attach(struct netmap_adapter *na, int num_queues)
+{
+ int n = num_queues + 2;
+ int size = sizeof(*na) + 2 * n * sizeof(struct netmap_kring);
+ void *buf;
+ struct ifnet *ifp = na->ifp;
+
+ if (ifp == NULL) {
+ D("ifp not set, giving up");
+ return EINVAL;
+ }
+ na->refcount = 0;
+ na->num_queues = num_queues;
+
+ buf = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (buf) {
+ ifp->if_pspare[0] = buf;
+ na->tx_rings = (void *)((char *)buf + sizeof(*na));
+ na->rx_rings = na->tx_rings + n;
+ bcopy(na, buf, sizeof(*na));
+ ifp->if_capabilities |= IFCAP_NETMAP;
+ }
+ D("%s for %s", buf ? "ok" : "failed", ifp->if_xname);
+
+ return (buf ? 0 : ENOMEM);
+}
+
+
+/*
+ * Free the allocated memory linked to the given ``netmap_adapter``
+ * object.
+ */
+void
+netmap_detach(struct ifnet *ifp)
+{
+ u_int i;
+ struct netmap_adapter *na = NA(ifp);
+
+ if (!na)
+ return;
+
+ for (i = 0; i < na->num_queues + 2; i++) {
+ knlist_destroy(&na->tx_rings[i].si.si_note);
+ knlist_destroy(&na->rx_rings[i].si.si_note);
+ }
+ bzero(na, sizeof(*na));
+ ifp->if_pspare[0] = NULL;
+ free(na, M_DEVBUF);
+}
+
+
+/*
+ * intercept packets coming from the network stack and present
+ * them to netmap as incoming packets on a separate ring.
+ * We are not locked when called.
+ */
+int
+netmap_start(struct ifnet *ifp, struct mbuf *m)
+{
+ struct netmap_adapter *na = NA(ifp);
+ u_int i, len, n = na->num_queues;
+ int error = EBUSY;
+ struct netmap_kring *kring = &na->rx_rings[n];
+ struct netmap_slot *slot;
+
+ len = m->m_pkthdr.len;
+ if (netmap_verbose & NM_VERB_HOST)
+ D("%s packet %d len %d from the stack", ifp->if_xname,
+ kring->nr_hwcur + kring->nr_hwavail, len);
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_LOCK, 0);
+ if (kring->nr_hwavail >= (int)kring->nkr_num_slots - 1) {
+ D("stack ring %s full\n", ifp->if_xname);
+ goto done; /* no space */
+ }
+ if (len > na->buff_size) {
+ D("drop packet size %d > %d", len, na->buff_size);
+ goto done; /* too long for us */
+ }
+
+ /* compute the insert position */
+ i = kring->nr_hwcur + kring->nr_hwavail;
+ if (i >= kring->nkr_num_slots)
+ i -= kring->nkr_num_slots;
+ slot = &kring->ring->slot[i];
+ m_copydata(m, 0, len, NMB(slot));
+ slot->len = len;
+ kring->nr_hwavail++;
+ if (netmap_verbose & NM_VERB_HOST)
+ D("wake up host ring %s %d", na->ifp->if_xname, na->num_queues);
+ selwakeuppri(&kring->si, PI_NET);
+ error = 0;
+done:
+ na->nm_lock(ifp->if_softc, NETMAP_CORE_UNLOCK, 0);
+
+ /* release the mbuf in either cases of success or failure. As an
+ * alternative, put the mbuf in a free list and free the list
+ * only when really necessary.
+ */
+ m_freem(m);
+
+ return (error);
+}
+
+
+/*
+ * netmap_reset() is called by the driver routines when reinitializing
+ * a ring. The driver is in charge of locking to protect the kring.
+ * If netmap mode is not set just return NULL.
+ * Otherwise set NR_REINIT (in the ring and in na) to signal
+ * that a ring has been reinitialized,
+ * set cur = hwcur = 0 and avail = hwavail = num_slots - 1 .
+ * IT IS IMPORTANT to leave one slot free even in the tx ring because
+ * we rely on cur=hwcur only for empty rings.
+ * These are good defaults but can be overridden later in the device
+ * specific code if, after a reinit, the ring does not start from 0
+ * (e.g. if_em.c does this).
+ *
+ * XXX we shouldn't be touching the ring, but there is a
+ * race anyways and this is our best option.
+ *
+ * XXX setting na->flags makes the syscall code faster, as there is
+ * only one place to check. On the other hand, we will need a better
+ * way to notify multiple threads that rings have been reset.
+ * One way is to increment na->rst_count at each ring reset.
+ * Each thread in its own priv structure will keep a matching counter,
+ * and on a reset will acknowledge and clean its own rings.
+ */
+struct netmap_slot *
+netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
+ u_int new_cur)
+{
+ struct netmap_kring *kring;
+ struct netmap_ring *ring;
+ struct netmap_slot *slot;
+ u_int i;
+
+ if (na == NULL)
+ return NULL; /* no netmap support here */
+ if (!(na->ifp->if_capenable & IFCAP_NETMAP))
+ return NULL; /* nothing to reinitialize */
+ kring = tx == NR_TX ? na->tx_rings + n : na->rx_rings + n;
+ ring = kring->ring;
+ if (tx == NR_TX) {
+ /*
+ * The last argument is the new value of next_to_clean.
+ *
+ * In the TX ring, we have P pending transmissions (from
+ * next_to_clean to nr_hwcur) followed by nr_hwavail free slots.
+ * Generally we can use all the slots in the ring so
+ * P = ring_size - nr_hwavail hence (modulo ring_size):
+ * next_to_clean == nr_hwcur + nr_hwavail
+ *
+ * If, upon a reset, nr_hwavail == ring_size and next_to_clean
+ * does not change we have nothing to report. Otherwise some
+ * pending packets may be lost, or newly injected packets will.
+ */
+ /* if hwcur does not change, nothing to report.
+ * otherwise remember the change so perhaps we can
+ * shift the block at the next reinit
+ */
+ if (new_cur == kring->nr_hwcur &&
+ kring->nr_hwavail == kring->nkr_num_slots - 1) {
+ /* all ok */
+ D("+++ NR_REINIT ok on %s TX[%d]", na->ifp->if_xname, n);
+ } else {
+ D("+++ NR_REINIT set on %s TX[%d]", na->ifp->if_xname, n);
+ }
+ ring->flags |= NR_REINIT;
+ na->flags |= NR_REINIT;
+ ring->avail = kring->nr_hwavail = kring->nkr_num_slots - 1;
+ ring->cur = kring->nr_hwcur = new_cur;
+ } else {
+ /*
+ * The last argument is the next free slot.
+ * In the RX ring we have nr_hwavail full buffers starting
+ * from nr_hwcur.
+ * If nr_hwavail == 0 and nr_hwcur does not change we are ok
+ * otherwise we might be in trouble as the buffers are
+ * changing.
+ */
+ if (new_cur == kring->nr_hwcur && kring->nr_hwavail == 0) {
+ /* all ok */
+ D("+++ NR_REINIT ok on %s RX[%d]", na->ifp->if_xname, n);
+ } else {
+ D("+++ NR_REINIT set on %s RX[%d]", na->ifp->if_xname, n);
+ }
+ ring->flags |= NR_REINIT;
+ na->flags |= NR_REINIT;
+ ring->avail = kring->nr_hwavail = 0; /* no data */
+ ring->cur = kring->nr_hwcur = new_cur;
+ }
+
+ slot = ring->slot;
+ /*
+ * Check that buffer indexes are correct. If we find a
+ * bogus value we are a bit in trouble because we cannot
+ * recover easily. Best we can do is (probably) persistently
+ * reset the ring.
+ */
+ for (i = 0; i < kring->nkr_num_slots; i++) {
+ if (slot[i].buf_idx >= netmap_total_buffers) {
+ D("invalid buf_idx %d at slot %d", slot[i].buf_idx, i);
+ slot[i].buf_idx = 0; /* XXX reset */
+ }
+ /* XXX we don't really need to set the length */
+ slot[i].len = 0;
+ }
+ /* wakeup possible waiters, both on the ring and on the global
+ * selfd. Perhaps a bit early now but the device specific
+ * routine is locked so hopefully we won't have a race.
+ */
+ selwakeuppri(&kring->si, PI_NET);
+ selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET);
+ return kring->ring->slot;
+}
+
+static void
+ns_dmamap_cb(__unused void *arg, __unused bus_dma_segment_t * segs,
+ __unused int nseg, __unused int error)
+{
+}
+
+/* unload a bus_dmamap and create a new one. Used when the
+ * buffer in the slot is changed.
+ * XXX buflen is probably not needed, buffers have constant size.
+ */
+void
+netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map,
+ void *buf, bus_size_t buflen)
+{
+ bus_addr_t paddr;
+ bus_dmamap_unload(tag, map);
+ bus_dmamap_load(tag, map, buf, buflen, ns_dmamap_cb, &paddr,
+ BUS_DMA_NOWAIT);
+}
+
+void
+netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map,
+ void *buf, bus_size_t buflen)
+{
+ bus_addr_t paddr;
+ bus_dmamap_load(tag, map, buf, buflen, ns_dmamap_cb, &paddr,
+ BUS_DMA_NOWAIT);
+}
+
+/*------ netmap memory allocator -------*/
+/*
+ * Request for a chunk of memory.
+ *
+ * Memory objects are arranged into a list, hence we need to walk this
+ * list until we find an object with the needed amount of data free.
+ * This sounds like a completely inefficient implementation, but given
+ * the fact that data allocation is done once, we can handle it
+ * flawlessly.
+ *
+ * Return NULL on failure.
+ */
+static void *
+netmap_malloc(size_t size, __unused const char *msg)
+{
+ struct netmap_mem_obj *mem_obj, *new_mem_obj;
+ void *ret = NULL;
+
+ NMA_LOCK();
+ TAILQ_FOREACH(mem_obj, &netmap_mem_d->nm_molist, nmo_next) {
+ if (mem_obj->nmo_used != 0 || mem_obj->nmo_size < size)
+ continue;
+
+ new_mem_obj = malloc(sizeof(struct netmap_mem_obj), M_NETMAP,
+ M_WAITOK | M_ZERO);
+ TAILQ_INSERT_BEFORE(mem_obj, new_mem_obj, nmo_next);
+
+ new_mem_obj->nmo_used = 1;
+ new_mem_obj->nmo_size = size;
+ new_mem_obj->nmo_data = mem_obj->nmo_data;
+ memset(new_mem_obj->nmo_data, 0, new_mem_obj->nmo_size);
+
+ mem_obj->nmo_size -= size;
+ mem_obj->nmo_data = (char *) mem_obj->nmo_data + size;
+ if (mem_obj->nmo_size == 0) {
+ TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj,
+ nmo_next);
+ free(mem_obj, M_NETMAP);
+ }
+
+ ret = new_mem_obj->nmo_data;
+
+ break;
+ }
+ NMA_UNLOCK();
+ ND("%s: %d bytes at %p", msg, size, ret);
+
+ return (ret);
+}
+
+/*
+ * Return the memory to the allocator.
+ *
+ * While freeing a memory object, we try to merge adjacent chunks in
+ * order to reduce memory fragmentation.
+ */
+static void
+netmap_free(void *addr, const char *msg)
+{
+ size_t size;
+ struct netmap_mem_obj *cur, *prev, *next;
+
+ if (addr == NULL) {
+ D("NULL addr for %s", msg);
+ return;
+ }
+
+ NMA_LOCK();
+ TAILQ_FOREACH(cur, &netmap_mem_d->nm_molist, nmo_next) {
+ if (cur->nmo_data == addr && cur->nmo_used)
+ break;
+ }
+ if (cur == NULL) {
+ NMA_UNLOCK();
+ D("invalid addr %s %p", msg, addr);
+ return;
+ }
+
+ size = cur->nmo_size;
+ cur->nmo_used = 0;
+
+ /* merge current chunk of memory with the previous one,
+ if present. */
+ prev = TAILQ_PREV(cur, netmap_mem_obj_h, nmo_next);
+ if (prev && prev->nmo_used == 0) {
+ TAILQ_REMOVE(&netmap_mem_d->nm_molist, cur, nmo_next);
+ prev->nmo_size += cur->nmo_size;
+ free(cur, M_NETMAP);
+ cur = prev;
+ }
+
+ /* merge with the next one */
+ next = TAILQ_NEXT(cur, nmo_next);
+ if (next && next->nmo_used == 0) {
+ TAILQ_REMOVE(&netmap_mem_d->nm_molist, next, nmo_next);
+ cur->nmo_size += next->nmo_size;
+ free(next, M_NETMAP);
+ }
+ NMA_UNLOCK();
+ ND("freed %s %d bytes at %p", msg, size, addr);
+}
+
+
+/*
+ * Initialize the memory allocator.
+ *
+ * Create the descriptor for the memory , allocate the pool of memory
+ * and initialize the list of memory objects with a single chunk
+ * containing the whole pre-allocated memory marked as free.
+ *
+ * Start with a large size, then halve as needed if we fail to
+ * allocate the block. While halving, always add one extra page
+ * because buffers 0 and 1 are used for special purposes.
+ * Return 0 on success, errno otherwise.
+ */
+static int
+netmap_memory_init(void)
+{
+ struct netmap_mem_obj *mem_obj;
+ void *buf = NULL;
+ int i, n, sz = NETMAP_MEMORY_SIZE;
+ int extra_sz = 0; // space for rings and two spare buffers
+
+ for (; !buf && sz >= 1<<20; sz >>=1) {
+ extra_sz = sz/200;
+ extra_sz = (extra_sz + 2*PAGE_SIZE - 1) & ~(PAGE_SIZE-1);
+ buf = contigmalloc(sz + extra_sz,
+ M_NETMAP,
+ M_WAITOK | M_ZERO,
+ 0, /* low address */
+ -1UL, /* high address */
+ PAGE_SIZE, /* alignment */
+ 0 /* boundary */
+ );
+ }
+ if (buf == NULL)
+ return (ENOMEM);
+ sz += extra_sz;
+ netmap_mem_d = malloc(sizeof(struct netmap_mem_d), M_NETMAP,
+ M_WAITOK | M_ZERO);
+ mtx_init(&netmap_mem_d->nm_mtx, "netmap memory allocator lock", NULL,
+ MTX_DEF);
+ TAILQ_INIT(&netmap_mem_d->nm_molist);
+ netmap_mem_d->nm_buffer = buf;
+ netmap_mem_d->nm_totalsize = sz;
+
+ /*
+ * A buffer takes 2k, a slot takes 8 bytes + ring overhead,
+ * so the ratio is 200:1. In other words, we can use 1/200 of
+ * the memory for the rings, and the rest for the buffers,
+ * and be sure we never run out.
+ */
+ netmap_mem_d->nm_size = sz/200;
+ netmap_mem_d->nm_buf_start =
+ (netmap_mem_d->nm_size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1);
+ netmap_mem_d->nm_buf_len = sz - netmap_mem_d->nm_buf_start;
+
+ nm_buf_pool.base = netmap_mem_d->nm_buffer;
+ nm_buf_pool.base += netmap_mem_d->nm_buf_start;
+ netmap_buffer_base = nm_buf_pool.base;
+ D("netmap_buffer_base %p (offset %d)",
+ netmap_buffer_base, netmap_mem_d->nm_buf_start);
+ /* number of buffers, they all start as free */
+
+ netmap_total_buffers = nm_buf_pool.total_buffers =
+ netmap_mem_d->nm_buf_len / NETMAP_BUF_SIZE;
+ nm_buf_pool.bufsize = NETMAP_BUF_SIZE;
+
+ D("Have %d MB, use %dKB for rings, %d buffers at %p",
+ (sz >> 20), (netmap_mem_d->nm_size >> 10),
+ nm_buf_pool.total_buffers, nm_buf_pool.base);
+
+ /* allocate and initialize the bitmap. Entry 0 is considered
+ * always busy (used as default when there are no buffers left).
+ */
+ n = (nm_buf_pool.total_buffers + 31) / 32;
+ nm_buf_pool.bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP,
+ M_WAITOK | M_ZERO);
+ nm_buf_pool.bitmap[0] = ~3; /* slot 0 and 1 always busy */
+ for (i = 1; i < n; i++)
+ nm_buf_pool.bitmap[i] = ~0;
+ nm_buf_pool.free = nm_buf_pool.total_buffers - 2;
+
+ mem_obj = malloc(sizeof(struct netmap_mem_obj), M_NETMAP,
+ M_WAITOK | M_ZERO);
+ TAILQ_INSERT_HEAD(&netmap_mem_d->nm_molist, mem_obj, nmo_next);
+ mem_obj->nmo_used = 0;
+ mem_obj->nmo_size = netmap_mem_d->nm_size;
+ mem_obj->nmo_data = netmap_mem_d->nm_buffer;
+
+ return (0);
+}
+
+
+/*
+ * Finalize the memory allocator.
+ *
+ * Free all the memory objects contained inside the list, and deallocate
+ * the pool of memory; finally free the memory allocator descriptor.
+ */
+static void
+netmap_memory_fini(void)
+{
+ struct netmap_mem_obj *mem_obj;
+
+ while (!TAILQ_EMPTY(&netmap_mem_d->nm_molist)) {
+ mem_obj = TAILQ_FIRST(&netmap_mem_d->nm_molist);
+ TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj, nmo_next);
+ if (mem_obj->nmo_used == 1) {
+ printf("netmap: leaked %d bytes at %p\n",
+ mem_obj->nmo_size,
+ mem_obj->nmo_data);
+ }
+ free(mem_obj, M_NETMAP);
+ }
+ contigfree(netmap_mem_d->nm_buffer, netmap_mem_d->nm_totalsize, M_NETMAP);
+ // XXX mutex_destroy(nm_mtx);
+ free(netmap_mem_d, M_NETMAP);
+}
+
+
+/*
+ * Module loader.
+ *
+ * Create the /dev/netmap device and initialize all global
+ * variables.
+ *
+ * Return 0 on success, errno on failure.
+ */
+static int
+netmap_init(void)
+{
+ int error;
+
+
+ error = netmap_memory_init();
+ if (error != 0) {
+ printf("netmap: unable to initialize the memory allocator.");
+ return (error);
+ }
+ printf("netmap: loaded module with %d Mbytes\n",
+ netmap_mem_d->nm_totalsize >> 20);
+
+ netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
+ "netmap");
+
+ return (0);
+}
+
+
+/*
+ * Module unloader.
+ *
+ * Free all the memory, and destroy the ``/dev/netmap`` device.
+ */
+static void
+netmap_fini(void)
+{
+ destroy_dev(netmap_dev);
+
+ netmap_memory_fini();
+
+ printf("netmap: unloaded module.\n");
+}
+
+
+/*
+ * Kernel entry point.
+ *
+ * Initialize/finalize the module and return.
+ *
+ * Return 0 on success, errno on failure.
+ */
+static int
+netmap_loader(__unused struct module *module, int event, __unused void *arg)
+{
+ int error = 0;
+
+ switch (event) {
+ case MOD_LOAD:
+ error = netmap_init();
+ break;
+
+ case MOD_UNLOAD:
+ netmap_fini();
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+
+DEV_MODULE(netmap, netmap_loader, NULL);
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
new file mode 100644
index 0000000000000..5434609c447b1
--- /dev/null
+++ b/sys/dev/netmap/netmap_kern.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ * $Id: netmap_kern.h 9662 2011-11-16 13:18:06Z luigi $
+ *
+ * The header contains the definitions of constants and function
+ * prototypes used only in kernelspace.
+ */
+
+#ifndef _NET_NETMAP_KERN_H_
+#define _NET_NETMAP_KERN_H_
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_NETMAP);
+#endif
+
+#define ND(format, ...)
+#define D(format, ...) \
+ do { \
+ struct timeval __xxts; \
+ microtime(&__xxts); \
+ printf("%03d.%06d %s [%d] " format "\n",\
+ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
+ __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+
+struct netmap_adapter;
+
+/*
+ * private, kernel view of a ring.
+ *
+ * XXX 20110627-todo
+ * The index in the NIC and netmap ring is offset by nkr_hwofs slots.
+ * This is so that, on a reset, buffers owned by userspace are not
+ * modified by the kernel. In particular:
+ * RX rings: the next empty buffer (hwcur + hwavail + hwofs) coincides
+ * the next empty buffer as known by the hardware (next_to_check or so).
+ * TX rings: hwcur + hwofs coincides with next_to_send
+ */
+struct netmap_kring {
+ struct netmap_ring *ring;
+ u_int nr_hwcur;
+ int nr_hwavail;
+ u_int nr_kflags;
+ u_int nkr_num_slots;
+
+ u_int nkr_hwofs; /* offset between NIC and netmap ring */
+ struct netmap_adapter *na; // debugging
+ struct selinfo si; /* poll/select wait queue */
+};
+
+/*
+ * This struct is part of and extends the 'struct adapter' (or
+ * equivalent) device descriptor. It contains all fields needed to
+ * support netmap operation.
+ */
+struct netmap_adapter {
+ int refcount; /* number of user-space descriptors using this
+ interface, which is equal to the number of
+ struct netmap_if objs in the mapped region. */
+
+ int separate_locks; /* set if the interface suports different
+ locks for rx, tx and core. */
+
+ u_int num_queues; /* number of tx/rx queue pairs: this is
+ a duplicate field needed to simplify the
+ signature of ``netmap_detach``. */
+
+ u_int num_tx_desc; /* number of descriptor in each queue */
+ u_int num_rx_desc;
+ u_int buff_size;
+
+ u_int flags; /* NR_REINIT */
+ /* tx_rings and rx_rings are private but allocated
+ * as a contiguous chunk of memory. Each array has
+ * N+1 entries, for the adapter queues and for the host queue.
+ */
+ struct netmap_kring *tx_rings; /* array of TX rings. */
+ struct netmap_kring *rx_rings; /* array of RX rings. */
+
+ /* copy of if_qflush and if_transmit pointers, to intercept
+ * packets from the network stack when netmap is active.
+ * XXX probably if_qflush is not necessary.
+ */
+ void (*if_qflush)(struct ifnet *);
+ int (*if_transmit)(struct ifnet *, struct mbuf *);
+
+ /* references to the ifnet and device routines, used by
+ * the generic netmap functions.
+ */
+ struct ifnet *ifp; /* adapter is ifp->if_softc */
+
+ int (*nm_register)(struct ifnet *, int onoff);
+ void (*nm_lock)(void *, int what, u_int ringid);
+ int (*nm_txsync)(void *, u_int ring, int lock);
+ int (*nm_rxsync)(void *, u_int ring, int lock);
+};
+
+/*
+ * The combination of "enable" (ifp->if_capabilities &IFCAP_NETMAP)
+ * and refcount gives the status of the interface, namely:
+ *
+ * enable refcount Status
+ *
+ * FALSE 0 normal operation
+ * FALSE != 0 -- (impossible)
+ * TRUE 1 netmap mode
+ * TRUE 0 being deleted.
+ */
+
+#define NETMAP_DELETING(_na) ( ((_na)->refcount == 0) && \
+ ( (_na)->ifp->if_capenable & IFCAP_NETMAP) )
+
+/*
+ * parameters for (*nm_lock)(adapter, what, index)
+ */
+enum {
+ NETMAP_NO_LOCK = 0,
+ NETMAP_CORE_LOCK, NETMAP_CORE_UNLOCK,
+ NETMAP_TX_LOCK, NETMAP_TX_UNLOCK,
+ NETMAP_RX_LOCK, NETMAP_RX_UNLOCK,
+};
+
+/*
+ * The following are support routines used by individual drivers to
+ * support netmap operation.
+ *
+ * netmap_attach() initializes a struct netmap_adapter, allocating the
+ * struct netmap_ring's and the struct selinfo.
+ *
+ * netmap_detach() frees the memory allocated by netmap_attach().
+ *
+ * netmap_start() replaces the if_transmit routine of the interface,
+ * and is used to intercept packets coming from the stack.
+ *
+ * netmap_load_map/netmap_reload_map are helper routines to set/reset
+ * the dmamap for a packet buffer
+ *
+ * netmap_reset() is a helper routine to be called in the driver
+ * when reinitializing a ring.
+ */
+int netmap_attach(struct netmap_adapter *, int);
+void netmap_detach(struct ifnet *);
+int netmap_start(struct ifnet *, struct mbuf *);
+enum txrx { NR_RX = 0, NR_TX = 1 };
+struct netmap_slot *netmap_reset(struct netmap_adapter *na,
+ enum txrx tx, int n, u_int new_cur);
+void netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map,
+ void *buf, bus_size_t buflen);
+void netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map,
+ void *buf, bus_size_t buflen);
+int netmap_ring_reinit(struct netmap_kring *);
+
+/*
+ * XXX eventually, get rid of netmap_total_buffers and netmap_buffer_base
+ * in favour of the structure
+ */
+// struct netmap_buf_pool;
+// extern struct netmap_buf_pool nm_buf_pool;
+extern u_int netmap_total_buffers;
+extern char *netmap_buffer_base;
+extern int netmap_verbose; // XXX debugging
+enum { /* verbose flags */
+ NM_VERB_ON = 1, /* generic verbose */
+ NM_VERB_HOST = 0x2, /* verbose host stack */
+ NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */
+ NM_VERB_TXSYNC = 0x20,
+ NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */
+ NM_VERB_TXINTR = 0x200,
+ NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */
+ NM_VERB_NIC_TXSYNC = 0x2000,
+};
+
+/*
+ * return a pointer to the struct netmap adapter from the ifp
+ */
+#define NA(_ifp) ((struct netmap_adapter *)(_ifp)->if_pspare[0])
+
+
+/*
+ * return the address of a buffer.
+ * XXX this is a special version with hardwired 2k bufs
+ * On error return netmap_buffer_base which is detected as a bad pointer.
+ */
+static inline char *
+NMB(struct netmap_slot *slot)
+{
+ uint32_t i = slot->buf_idx;
+ return (i >= netmap_total_buffers) ? netmap_buffer_base :
+#if NETMAP_BUF_SIZE == 2048
+ netmap_buffer_base + (i << 11);
+#else
+ netmap_buffer_base + (i *NETMAP_BUF_SIZE);
+#endif
+}
+
+#endif /* _NET_NETMAP_KERN_H_ */