From 2ff91c175eca50b7d0d9da6b31eae4109c034137 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Thu, 12 Apr 2018 07:20:50 +0000 Subject: netmap: align codebase to the current upstream (commit id 3fb001303718146) Changelist: - Turn tx_rings and rx_rings arrays into arrays of pointers to kring structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib, vtnet and ptnet drivers to cope with the change. - Generalize the nm_config() callback to accept a struct containing many parameters. - Introduce NKR_FAKERING to support buffers sharing (used for netmap pipes) - Improved API for external VALE modules. - Various bug fixes and improvements to the netmap memory allocator, including support for externally (userspace) allocated memory. - Refactoring of netmap pipes: now linked rings share the same netmap buffers, with a separate set of kring pointers (rhead, rcur, rtail). Buffer swapping does not need to happen anymore. - Large refactoring of the control API towards an extensible solution; the goal is to allow the addition of more commands and extension of existing ones (with new options) without the need of hacks or the risk of running out of configuration space. A new NIOCCTRL ioctl has been added to handle all the requests of the new control API, which cover all the functionalities so far supported. The netmap API bumps from 11 to 12 with this patch. Full backward compatibility is provided for the old control command (NIOCREGIF), by means of a new netmap_legacy module. Many parts of the old netmap.h header has now been moved to netmap_legacy.h (included by netmap.h). Approved by: hrs (mentor) --- sys/dev/netmap/if_ptnet.c | 34 +- sys/dev/netmap/if_re_netmap.h | 6 +- sys/dev/netmap/if_vtnet_netmap.h | 26 +- sys/dev/netmap/netmap.c | 1077 ++++++++++++++++++++++++++------------ sys/dev/netmap/netmap_freebsd.c | 120 ++++- sys/dev/netmap/netmap_generic.c | 6 +- sys/dev/netmap/netmap_kern.h | 155 ++++-- sys/dev/netmap/netmap_legacy.c | 428 +++++++++++++++ sys/dev/netmap/netmap_mem2.c | 564 +++++++++----------- sys/dev/netmap/netmap_mem2.h | 21 +- sys/dev/netmap/netmap_monitor.c | 56 +- sys/dev/netmap/netmap_pipe.c | 323 ++++++++---- sys/dev/netmap/netmap_pt.c | 195 +++---- sys/dev/netmap/netmap_vale.c | 933 ++++++++++++++++++++------------- 14 files changed, 2575 insertions(+), 1369 deletions(-) create mode 100644 sys/dev/netmap/netmap_legacy.c (limited to 'sys/dev/netmap') diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index 1805a7f31e48d..b6059dc55cfad 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -210,8 +210,8 @@ static int ptnet_irqs_init(struct ptnet_softc *sc); static void ptnet_irqs_fini(struct ptnet_softc *sc); static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd); -static int ptnet_nm_config(struct netmap_adapter *na, unsigned *txr, - unsigned *txd, unsigned *rxr, unsigned *rxd); +static int ptnet_nm_config(struct netmap_adapter *na, + struct nm_config_info *info); static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); static int ptnet_nm_register(struct netmap_adapter *na, int onoff); static int ptnet_nm_txsync(struct netmap_kring *kring, int flags); @@ -1104,18 +1104,20 @@ ptnet_nm_ptctl(if_t ifp, uint32_t cmd) } static int -ptnet_nm_config(struct netmap_adapter *na, unsigned *txr, unsigned *txd, - unsigned *rxr, unsigned *rxd) +ptnet_nm_config(struct netmap_adapter *na, struct nm_config_info *info) { struct ptnet_softc *sc = if_getsoftc(na->ifp); - *txr = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS); - *rxr = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS); - *txd = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS); - *rxd = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS); + info->num_tx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS); + info->num_rx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS); + info->num_tx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS); + info->num_rx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS); + info->rx_buf_maxsize = NETMAP_BUF_SIZE(na); - device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u\n", - *txr, *rxr, *txd, *rxd); + device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u, rxbufsz %u\n", + info->num_tx_rings, info->num_rx_rings, + info->num_tx_descs, info->num_rx_descs, + info->rx_buf_maxsize); return 0; } @@ -1133,9 +1135,9 @@ ptnet_sync_from_csb(struct ptnet_softc *sc, struct netmap_adapter *na) struct netmap_kring *kring; if (i < na->num_tx_rings) { - kring = na->tx_rings + i; + kring = na->tx_rings[i]; } else { - kring = na->rx_rings + i - na->num_tx_rings; + kring = na->rx_rings[i - na->num_tx_rings]; } kring->rhead = kring->ring->head = ptgh->head; kring->rcur = kring->ring->cur = ptgh->cur; @@ -1228,7 +1230,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) if (native) { for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { kring->nr_mode = NKR_NETMAP_ON; @@ -1243,7 +1245,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) nm_clear_native_flags(na); for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; @@ -1758,7 +1760,7 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget, ptgh = pq->ptgh; pthg = pq->pthg; - kring = na->tx_rings + pq->kring_id; + kring = na->tx_rings[pq->kring_id]; ring = kring->ring; lim = kring->nkr_num_slots - 1; head = ring->head; @@ -2021,7 +2023,7 @@ ptnet_rx_eof(struct ptnet_queue *pq, unsigned int budget, bool may_resched) struct ptnet_csb_gh *ptgh = pq->ptgh; struct ptnet_csb_hg *pthg = pq->pthg; struct netmap_adapter *na = &sc->ptna->dr.up; - struct netmap_kring *kring = na->rx_rings + pq->kring_id; + struct netmap_kring *kring = na->rx_rings[pq->kring_id]; struct netmap_ring *ring = kring->ring; unsigned int const lim = kring->nkr_num_slots - 1; unsigned int batch_count = 0; diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index e7dd087acc676..0e56a731ac6aa 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -304,7 +304,7 @@ re_netmap_tx_init(struct rl_softc *sc) /* l points in the netmap ring, i points in the NIC ring */ for (i = 0; i < n; i++) { uint64_t paddr; - int l = netmap_idx_n2k(&na->tx_rings[0], i); + int l = netmap_idx_n2k(na->tx_rings[0], i); void *addr = PNMB(na, slot + l, &paddr); desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); @@ -330,11 +330,11 @@ re_netmap_rx_init(struct rl_softc *sc) * Do not release the slots owned by userspace, * and also keep one empty. */ - max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]); + max_avail = n - 1 - nm_kr_rxspace(na->rx_rings[0]); for (nic_i = 0; nic_i < n; nic_i++) { void *addr; uint64_t paddr; - uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i); + uint32_t nm_i = netmap_idx_n2k(na->rx_rings[0], nic_i); addr = PNMB(na, slot + nm_i, &paddr); diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h index 10789c53d1f06..e4ab64d2ed976 100644 --- a/sys/dev/netmap/if_vtnet_netmap.h +++ b/sys/dev/netmap/if_vtnet_netmap.h @@ -383,7 +383,7 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) if (!nm_native_on(na)) return 0; for (r = 0; r < na->num_rx_rings; r++) { - struct netmap_kring *kring = &na->rx_rings[r]; + struct netmap_kring *kring = na->rx_rings[r]; struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r]; struct virtqueue *vq = rxq->vtnrx_vq; struct netmap_slot* slot; @@ -407,29 +407,6 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) return 1; } -/* Update the virtio-net device configurations. Number of queues can - * change dinamically, by 'ethtool --set-channels $IFNAME combined $N'. - * This is actually the only way virtio-net can currently enable - * the multiqueue mode. - * XXX note that we seem to lose packets if the netmap ring has more - * slots than the queue - */ -static int -vtnet_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *sc = ifp->if_softc; - - *txr = *rxr = sc->vtnet_max_vq_pairs; - *rxd = 512; // sc->vtnet_rx_nmbufs; - *txd = *rxd; // XXX - D("vtnet config txq=%d, txd=%d rxq=%d, rxd=%d", - *txr, *txd, *rxr, *rxd); - - return 0; -} - static void vtnet_netmap_attach(struct SOFTC_T *sc) { @@ -443,7 +420,6 @@ vtnet_netmap_attach(struct SOFTC_T *sc) na.nm_register = vtnet_netmap_reg; na.nm_txsync = vtnet_netmap_txsync; na.nm_rxsync = vtnet_netmap_rxsync; - na.nm_config = vtnet_netmap_config; na.nm_intr = vtnet_netmap_intr; na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; D("max rings %d", sc->vtnet_max_vq_pairs); diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 3c5551bad1569..d6230dfb8ebe1 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -262,7 +262,7 @@ ports attached to the switch) * * Any network interface known to the system (including a persistent VALE * port) can be attached to a VALE switch by issuing the - * NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports + * NETMAP_REQ_VALE_ATTACH command. After the attachment, persistent VALE ports * look exactly like ephemeral VALE ports (as created in step 2 above). The * attachment of other interfaces, instead, requires the creation of a * netmap_bwrap_adapter. Moreover, the attached interface must be put in @@ -591,9 +591,9 @@ void netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped) { if (stopped) - netmap_disable_ring(NMR(na, t) + ring_id, stopped); + netmap_disable_ring(NMR(na, t)[ring_id], stopped); else - NMR(na, t)[ring_id].nkr_stopped = 0; + NMR(na, t)[ring_id]->nkr_stopped = 0; } @@ -745,39 +745,42 @@ nm_dump_buf(char *p, int len, int lim, char *dst) int netmap_update_config(struct netmap_adapter *na) { - u_int txr, txd, rxr, rxd; + struct nm_config_info info; - txr = txd = rxr = rxd = 0; + bzero(&info, sizeof(info)); if (na->nm_config == NULL || - na->nm_config(na, &txr, &txd, &rxr, &rxd)) - { + na->nm_config(na, &info)) { /* take whatever we had at init time */ - txr = na->num_tx_rings; - txd = na->num_tx_desc; - rxr = na->num_rx_rings; - rxd = na->num_rx_desc; - } - - if (na->num_tx_rings == txr && na->num_tx_desc == txd && - na->num_rx_rings == rxr && na->num_rx_desc == rxd) + info.num_tx_rings = na->num_tx_rings; + info.num_tx_descs = na->num_tx_desc; + info.num_rx_rings = na->num_rx_rings; + info.num_rx_descs = na->num_rx_desc; + info.rx_buf_maxsize = na->rx_buf_maxsize; + } + + if (na->num_tx_rings == info.num_tx_rings && + na->num_tx_desc == info.num_tx_descs && + na->num_rx_rings == info.num_rx_rings && + na->num_rx_desc == info.num_rx_descs && + na->rx_buf_maxsize == info.rx_buf_maxsize) return 0; /* nothing changed */ - if (netmap_verbose || na->active_fds > 0) { - D("stored config %s: txring %d x %d, rxring %d x %d", - na->name, - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc); - D("new config %s: txring %d x %d, rxring %d x %d", - na->name, txr, txd, rxr, rxd); - } if (na->active_fds == 0) { - D("configuration changed (but fine)"); - na->num_tx_rings = txr; - na->num_tx_desc = txd; - na->num_rx_rings = rxr; - na->num_rx_desc = rxd; + D("configuration changed for %s: txring %d x %d, " + "rxring %d x %d, rxbufsz %d", + na->name, na->num_tx_rings, na->num_tx_desc, + na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize); + na->num_tx_rings = info.num_tx_rings; + na->num_tx_desc = info.num_tx_descs; + na->num_rx_rings = info.num_rx_rings; + na->num_rx_desc = info.num_rx_descs; + na->rx_buf_maxsize = info.rx_buf_maxsize; return 0; } - D("configuration changed while active, this is bad..."); + D("WARNING: configuration changed for %s while active: " + "txring %d x %d, rxring %d x %d, rxbufsz %d", + na->name, info.num_tx_rings, info.num_tx_descs, + info.num_rx_rings, info.num_rx_descs, + info.rx_buf_maxsize); return 1; } @@ -827,7 +830,9 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) n[NR_TX] = na->num_tx_rings + 1; n[NR_RX] = na->num_rx_rings + 1; - len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom; + len = (n[NR_TX] + n[NR_RX]) * + (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *)) + + tailroom; na->tx_rings = nm_os_malloc((size_t)len); if (na->tx_rings == NULL) { @@ -835,6 +840,14 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) return ENOMEM; } na->rx_rings = na->tx_rings + n[NR_TX]; + na->tailroom = na->rx_rings + n[NR_RX]; + + /* link the krings in the krings array */ + kring = (struct netmap_kring *)((char *)na->tailroom + tailroom); + for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) { + na->tx_rings[i] = kring; + kring++; + } /* * All fields in krings are 0 except the one initialized below. @@ -843,9 +856,10 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) for_rx_tx(t) { ndesc = nma_get_ndesc(na, t); for (i = 0; i < n[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); kring->na = na; + kring->notify_na = na; kring->ring_id = i; kring->tx = t; kring->nkr_num_slots = ndesc; @@ -854,6 +868,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) if (i < nma_get_nrings(na, t)) { kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync); } else { + if (!(na->na_flags & NAF_HOST_RINGS)) + kring->nr_kflags |= NKR_FAKERING; kring->nm_sync = (t == NR_TX ? netmap_txsync_to_host: netmap_rxsync_from_host); @@ -874,7 +890,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) nm_os_selinfo_init(&na->si[t]); } - na->tailroom = na->rx_rings + n[NR_RX]; return 0; } @@ -885,7 +900,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) void netmap_krings_delete(struct netmap_adapter *na) { - struct netmap_kring *kring = na->tx_rings; + struct netmap_kring **kring = na->tx_rings; enum txrx t; if (na->tx_rings == NULL) { @@ -898,8 +913,8 @@ netmap_krings_delete(struct netmap_adapter *na) /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&kring->q_lock); - nm_os_selinfo_uninit(&kring->si); + mtx_destroy(&(*kring)->q_lock); + nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); na->tx_rings = na->rx_rings = na->tailroom = NULL; @@ -915,7 +930,7 @@ netmap_krings_delete(struct netmap_adapter *na) void netmap_hw_krings_delete(struct netmap_adapter *na) { - struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; + struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue; ND("destroy sw mbq with len %d", mbq_len(q)); mbq_purge(q); @@ -1196,7 +1211,7 @@ nm_may_forward_down(struct netmap_kring *kring, int sync_flags) static u_int netmap_sw_to_nic(struct netmap_adapter *na) { - struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; + struct netmap_kring *kring = na->rx_rings[na->num_rx_rings]; struct netmap_slot *rxslot = kring->ring->slot; u_int i, rxcur = kring->nr_hwcur; u_int const head = kring->rhead; @@ -1205,7 +1220,7 @@ netmap_sw_to_nic(struct netmap_adapter *na) /* scan rings to find space, then fill as much as possible */ for (i = 0; i < na->num_tx_rings; i++) { - struct netmap_kring *kdst = &na->tx_rings[i]; + struct netmap_kring *kdst = na->tx_rings[i]; struct netmap_ring *rdst = kdst->ring; u_int const dst_lim = kdst->nkr_num_slots - 1; @@ -1443,7 +1458,7 @@ assign_mem: * MUST BE CALLED UNDER NMG_LOCK() * * Get a refcounted reference to a netmap adapter attached - * to the interface specified by nmr. + * to the interface specified by req. * This is always called in the execution of an ioctl(). * * Return ENXIO if the interface specified by the request does @@ -1453,13 +1468,15 @@ assign_mem: * could not be allocated. * If successful, hold a reference to the netmap adapter. * - * If the interface specified by nmr is a system one, also keep + * If the interface specified by req is a system one, also keep * a reference to it and return a valid *ifp. */ int -netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, - struct ifnet **ifp, struct netmap_mem_d *nmd, int create) +netmap_get_na(struct nmreq_header *hdr, + struct netmap_adapter **na, struct ifnet **ifp, + struct netmap_mem_d *nmd, int create) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; int error = 0; struct netmap_adapter *ret = NULL; int nmd_ref = 0; @@ -1467,13 +1484,24 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, *na = NULL; /* default return value */ *ifp = NULL; + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } + + if (req->nr_mode == NR_REG_PIPE_MASTER || + req->nr_mode == NR_REG_PIPE_SLAVE) { + /* Do not accept deprecated pipe modes. */ + D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax"); + return EINVAL; + } + NMG_LOCK_ASSERT(); /* if the request contain a memid, try to find the * corresponding memory region */ - if (nmd == NULL && nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (nmd == NULL && req->nr_mem_id) { + nmd = netmap_mem_find(req->nr_mem_id); if (nmd == NULL) return EINVAL; /* keep the rereference */ @@ -1492,22 +1520,22 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, */ /* try to see if this is a ptnetmap port */ - error = netmap_get_pt_host_na(nmr, na, nmd, create); + error = netmap_get_pt_host_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a monitor port */ - error = netmap_get_monitor_na(nmr, na, nmd, create); + error = netmap_get_monitor_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a pipe port */ - error = netmap_get_pipe_na(nmr, na, nmd, create); + error = netmap_get_pipe_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a bridge port */ - error = netmap_get_bdg_na(nmr, na, nmd, create); + error = netmap_get_bdg_na(hdr, na, nmd, create); if (error) goto out; @@ -1520,7 +1548,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, * This may still be a tap, a veth/epair, or even a * persistent VALE port. */ - *ifp = ifunit_ref(nmr->nr_name); + *ifp = ifunit_ref(hdr->nr_name); if (*ifp == NULL) { error = ENXIO; goto out; @@ -1765,42 +1793,27 @@ netmap_ring_reinit(struct netmap_kring *kring) * */ int -netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) +netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_adapter *na = priv->np_na; - u_int j, i = ringid & NETMAP_RING_MASK; - u_int reg = flags & NR_REG_MASK; int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY }; enum txrx t; + u_int j; - if (reg == NR_REG_DEFAULT) { - /* convert from old ringid to flags */ - if (ringid & NETMAP_SW_RING) { - reg = NR_REG_SW; - } else if (ringid & NETMAP_HW_RING) { - reg = NR_REG_ONE_NIC; - } else { - reg = NR_REG_ALL_NIC; - } - D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg); - } - - if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC && - reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) || - flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { + if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) || + nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { D("Error: only NR_REG_ALL_NIC supported with netmap passthrough"); return EINVAL; } for_rx_tx(t) { - if (flags & excluded_direction[t]) { + if (nr_flags & excluded_direction[t]) { priv->np_qfirst[t] = priv->np_qlast[t] = 0; continue; } - switch (reg) { + switch (nr_mode) { case NR_REG_ALL_NIC: - case NR_REG_PIPE_MASTER: - case NR_REG_PIPE_SLAVE: priv->np_qfirst[t] = 0; priv->np_qlast[t] = nma_get_nrings(na, t); ND("ALL/PIPE: %s %d %d", nm_txrx2str(t), @@ -1812,20 +1825,21 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags D("host rings not supported"); return EINVAL; } - priv->np_qfirst[t] = (reg == NR_REG_SW ? + priv->np_qfirst[t] = (nr_mode == NR_REG_SW ? nma_get_nrings(na, t) : 0); priv->np_qlast[t] = nma_get_nrings(na, t) + 1; - ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW", + ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW", nm_txrx2str(t), priv->np_qfirst[t], priv->np_qlast[t]); break; case NR_REG_ONE_NIC: - if (i >= na->num_tx_rings && i >= na->num_rx_rings) { - D("invalid ring id %d", i); + if (nr_ringid >= na->num_tx_rings && + nr_ringid >= na->num_rx_rings) { + D("invalid ring id %d", nr_ringid); return EINVAL; } /* if not enough rings, use the first one */ - j = i; + j = nr_ringid; if (j >= nma_get_nrings(na, t)) j = 0; priv->np_qfirst[t] = j; @@ -1834,11 +1848,11 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags priv->np_qfirst[t], priv->np_qlast[t]); break; default: - D("invalid regif type %d", reg); + D("invalid regif type %d", nr_mode); return EINVAL; } } - priv->np_flags = (flags & ~NR_REG_MASK) | reg; + priv->np_flags = nr_flags | nr_mode; // TODO /* Allow transparent forwarding mode in the host --> nic * direction only if all the TX hw rings have been opened. */ @@ -1854,7 +1868,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags priv->np_qlast[NR_TX], priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX], - i); + nr_ringid); } return 0; } @@ -1865,18 +1879,19 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags * for all rings is the same as a single ring. */ static int -netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) +netmap_set_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_adapter *na = priv->np_na; int error; enum txrx t; - error = netmap_interp_ringid(priv, ringid, flags); + error = netmap_interp_ringid(priv, nr_mode, nr_ringid, nr_flags); if (error) { return error; } - priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; + priv->np_txpoll = (nr_flags & NR_NO_TX_POLL) ? 0 : 1; /* optimization: count the users registered for more than * one ring, which are the ones sleeping on the global queue. @@ -1933,7 +1948,7 @@ netmap_krings_get(struct netmap_priv_d *priv) */ for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if ((kring->nr_kflags & NKR_EXCLUSIVE) || (kring->users && excl)) { @@ -1948,7 +1963,7 @@ netmap_krings_get(struct netmap_priv_d *priv) */ for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; kring->users++; if (excl) kring->nr_kflags |= NKR_EXCLUSIVE; @@ -1979,10 +1994,9 @@ netmap_krings_put(struct netmap_priv_d *priv) priv->np_qfirst[NR_RX], priv->np_qlast[MR_RX]); - for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if (excl) kring->nr_kflags &= ~NKR_EXCLUSIVE; kring->users--; @@ -1992,6 +2006,12 @@ netmap_krings_put(struct netmap_priv_d *priv) } } +static int +nm_priv_rx_enabled(struct netmap_priv_d *priv) +{ + return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]); +} + /* * possibly move the interface to netmap-mode. * If success it returns a pointer to netmap_if, otherwise NULL. @@ -2064,16 +2084,14 @@ netmap_krings_put(struct netmap_priv_d *priv) */ int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags) + uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_if *nifp = NULL; int error; NMG_LOCK_ASSERT(); - /* ring configuration may have changed, fetch from the card */ - netmap_update_config(na); priv->np_na = na; /* store the reference */ - error = netmap_set_ringid(priv, ringid, flags); + error = netmap_set_ringid(priv, nr_mode, nr_ringid, nr_flags); if (error) goto err; error = netmap_mem_finalize(na->nm_mem, na); @@ -2081,27 +2099,38 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, goto err; if (na->active_fds == 0) { + + /* cache the allocator info in the na */ + error = netmap_mem_get_lut(na->nm_mem, &na->na_lut); + if (error) + goto err_drop_mem; + ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal, + na->na_lut.objsize); + + /* ring configuration may have changed, fetch from the card */ + netmap_update_config(na); + /* * If this is the first registration of the adapter, * perform sanity checks and create the in-kernel view * of the netmap rings (the netmap krings). */ - if (na->ifp) { + if (na->ifp && nm_priv_rx_enabled(priv)) { /* This netmap adapter is attached to an ifnet. */ unsigned nbs = netmap_mem_bufsize(na->nm_mem); unsigned mtu = nm_os_ifnet_mtu(na->ifp); - /* The maximum amount of bytes that a single - * receive or transmit NIC descriptor can hold. */ - unsigned hw_max_slot_len = 4096; - if (mtu <= hw_max_slot_len) { + ND("mtu %d rx_buf_maxsize %d netmap_buf_size %d", + mtu, na->rx_buf_maxsize, nbs); + + if (mtu <= na->rx_buf_maxsize) { /* The MTU fits a single NIC slot. We only * Need to check that netmap buffers are * large enough to hold an MTU. NS_MOREFRAG * cannot be used in this case. */ if (nbs < mtu) { nm_prerr("error: netmap buf size (%u) " - "< device MTU (%u)", nbs, mtu); + "< device MTU (%u)\n", nbs, mtu); error = EINVAL; goto err_drop_mem; } @@ -2114,22 +2143,22 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, if (!(na->na_flags & NAF_MOREFRAG)) { nm_prerr("error: large MTU (%d) needed " "but %s does not support " - "NS_MOREFRAG", mtu, + "NS_MOREFRAG\n", mtu, na->ifp->if_xname); error = EINVAL; goto err_drop_mem; - } else if (nbs < hw_max_slot_len) { + } else if (nbs < na->rx_buf_maxsize) { nm_prerr("error: using NS_MOREFRAG on " "%s requires netmap buf size " - ">= %u", na->ifp->if_xname, - hw_max_slot_len); + ">= %u\n", na->ifp->if_xname, + na->rx_buf_maxsize); error = EINVAL; goto err_drop_mem; } else { nm_prinf("info: netmap application on " "%s needs to support " "NS_MOREFRAG " - "(MTU=%u,netmap_buf_size=%u)", + "(MTU=%u,netmap_buf_size=%u)\n", na->ifp->if_xname, mtu, nbs); } } @@ -2141,7 +2170,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, */ error = na->nm_krings_create(na); if (error) - goto err_drop_mem; + goto err_put_lut; } @@ -2165,21 +2194,12 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, goto err_del_rings; } - if (na->active_fds == 0) { - /* cache the allocator info in the na */ - error = netmap_mem_get_lut(na->nm_mem, &na->na_lut); - if (error) - goto err_del_if; - ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal, - na->na_lut.objsize); - } - if (nm_kring_pending(priv)) { /* Some kring is switching mode, tell the adapter to * react on this. */ error = na->nm_register(na, 1); if (error) - goto err_put_lut; + goto err_del_if; } /* Commit the reference. */ @@ -2195,9 +2215,6 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, return 0; -err_put_lut: - if (na->active_fds == 0) - memset(&na->na_lut, 0, sizeof(na->na_lut)); err_del_if: netmap_mem_if_delete(na, nifp); err_del_rings: @@ -2207,6 +2224,9 @@ err_rel_excl: err_del_krings: if (na->active_fds == 0) na->nm_krings_delete(na); +err_put_lut: + if (na->active_fds == 0) + memset(&na->na_lut, 0, sizeof(na->na_lut)); err_drop_mem: netmap_mem_drop(na); err: @@ -2242,246 +2262,367 @@ ring_timestamp_set(struct netmap_ring *ring) } } +static int nmreq_copyin(struct nmreq_header *, int); +static int nmreq_copyout(struct nmreq_header *, int); +static int nmreq_checkoptions(struct nmreq_header *); /* * ioctl(2) support for the "netmap" device. * * Following a list of accepted commands: - * - NIOCGINFO + * - NIOCCTRL device control API + * - NIOCTXSYNC sync TX rings + * - NIOCRXSYNC sync RX rings * - SIOCGIFADDR just for convenience - * - NIOCREGIF - * - NIOCTXSYNC - * - NIOCRXSYNC + * - NIOCGINFO deprecated (legacy API) + * - NIOCREGIF deprecated (legacy API) * * Return 0 on success, errno otherwise. */ int -netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td) +netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td, int nr_body_is_user) { struct mbq q; /* packets from RX hw queues to host stack */ - struct nmreq *nmr = (struct nmreq *) data; struct netmap_adapter *na = NULL; struct netmap_mem_d *nmd = NULL; struct ifnet *ifp = NULL; int error = 0; u_int i, qfirst, qlast; struct netmap_if *nifp; - struct netmap_kring *krings; + struct netmap_kring **krings; int sync_flags; enum txrx t; - if (cmd == NIOCGINFO || cmd == NIOCREGIF) { - /* truncate name */ - nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; - if (nmr->nr_version != NETMAP_API) { - D("API mismatch for %s got %d need %d", - nmr->nr_name, - nmr->nr_version, NETMAP_API); - nmr->nr_version = NETMAP_API; + switch (cmd) { + case NIOCCTRL: { + struct nmreq_header *hdr = (struct nmreq_header *)data; + + if (hdr->nr_version != NETMAP_API) { + D("API mismatch for reqtype %d: got %d need %d", + hdr->nr_version, + hdr->nr_version, NETMAP_API); + hdr->nr_version = NETMAP_API; } - if (nmr->nr_version < NETMAP_MIN_API || - nmr->nr_version > NETMAP_MAX_API) { + if (hdr->nr_version < NETMAP_MIN_API || + hdr->nr_version > NETMAP_MAX_API) { return EINVAL; } - } - switch (cmd) { - case NIOCGINFO: /* return capabilities etc */ - if (nmr->nr_cmd == NETMAP_BDG_LIST) { - error = netmap_bdg_ctl(nmr, NULL); - break; + /* Make a kernel-space copy of the user-space nr_body. + * For convenince, the nr_body pointer and the pointers + * in the options list will be replaced with their + * kernel-space counterparts. The original pointers are + * saved internally and later restored by nmreq_copyout + */ + error = nmreq_copyin(hdr, nr_body_is_user); + if (error) { + return error; } - NMG_LOCK(); - do { - /* memsize is always valid */ - u_int memflags; - uint64_t memsize; + /* Sanitize hdr->nr_name. */ + hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0'; + + switch (hdr->nr_reqtype) { + case NETMAP_REQ_REGISTER: { + struct nmreq_register *req = + (struct nmreq_register *)hdr->nr_body; + /* Protect access to priv from concurrent requests. */ + NMG_LOCK(); + do { + u_int memflags; +#ifdef WITH_EXTMEM + struct nmreq_option *opt; +#endif /* WITH_EXTMEM */ + + if (priv->np_nifp != NULL) { /* thread already registered */ + error = EBUSY; + break; + } + +#ifdef WITH_EXTMEM + opt = nmreq_findoption((struct nmreq_option *)hdr->nr_options, + NETMAP_REQ_OPT_EXTMEM); + if (opt != NULL) { + struct nmreq_opt_extmem *e = + (struct nmreq_opt_extmem *)opt; + + error = nmreq_checkduplicate(opt); + if (error) { + opt->nro_status = error; + break; + } + nmd = netmap_mem_ext_create(e->nro_usrptr, + &e->nro_info, &error); + opt->nro_status = error; + if (nmd == NULL) + break; + } +#endif /* WITH_EXTMEM */ + + if (nmd == NULL && req->nr_mem_id) { + /* find the allocator and get a reference */ + nmd = netmap_mem_find(req->nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + break; + } + } + /* find the interface and a reference */ + error = netmap_get_na(hdr, &na, &ifp, nmd, + 1 /* create */); /* keep reference */ + if (error) + break; + if (NETMAP_OWNED_BY_KERN(na)) { + error = EBUSY; + break; + } - if (nmr->nr_name[0] != '\0') { + if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) { + error = EIO; + break; + } - /* get a refcount */ - error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */); + error = netmap_do_regif(priv, na, req->nr_mode, + req->nr_ringid, req->nr_flags); + if (error) { /* reg. failed, release priv and ref */ + break; + } + nifp = priv->np_nifp; + priv->np_td = td; /* for debugging purposes */ + + /* return the offset of the netmap_if object */ + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags, + &req->nr_mem_id); if (error) { - na = NULL; - ifp = NULL; + netmap_do_unregif(priv); break; } - nmd = na->nm_mem; /* get memory allocator */ - } else { - nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1); - if (nmd == NULL) { - error = EINVAL; + if (memflags & NETMAP_MEM_PRIVATE) { + *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; + } + for_rx_tx(t) { + priv->np_si[t] = nm_si_user(priv, t) ? + &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si; + } + + if (req->nr_extra_bufs) { + if (netmap_verbose) + D("requested %d extra buffers", + req->nr_extra_bufs); + req->nr_extra_bufs = netmap_extra_alloc(na, + &nifp->ni_bufs_head, req->nr_extra_bufs); + if (netmap_verbose) + D("got %d extra buffers", req->nr_extra_bufs); + } + req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); + + error = nmreq_checkoptions(hdr); + if (error) { + netmap_do_unregif(priv); break; } + + /* store ifp reference so that priv destructor may release it */ + priv->np_ifp = ifp; + } while (0); + if (error) { + netmap_unget_na(na, ifp); } + /* release the reference from netmap_mem_find() or + * netmap_mem_ext_create() + */ + if (nmd) + netmap_mem_put(nmd); + NMG_UNLOCK(); + break; + } - error = netmap_mem_get_info(nmd, &memsize, &memflags, - &nmr->nr_arg2); - if (error) - break; - nmr->nr_memsize = (uint32_t)memsize; - if (na == NULL) /* only memory info */ - break; - nmr->nr_offset = 0; - nmr->nr_rx_slots = nmr->nr_tx_slots = 0; - netmap_update_config(na); - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - } while (0); - netmap_unget_na(na, ifp); - NMG_UNLOCK(); - break; + case NETMAP_REQ_PORT_INFO_GET: { + struct nmreq_port_info_get *req = + (struct nmreq_port_info_get *)hdr->nr_body; - case NIOCREGIF: - /* - * If nmr->nr_cmd is not zero, this NIOCREGIF is not really - * a regif operation, but a different one, specified by the - * value of nmr->nr_cmd. - */ - i = nmr->nr_cmd; - if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH - || i == NETMAP_BDG_VNET_HDR - || i == NETMAP_BDG_NEWIF - || i == NETMAP_BDG_DELIF - || i == NETMAP_BDG_POLLING_ON - || i == NETMAP_BDG_POLLING_OFF) { - /* possibly attach/detach NIC and VALE switch */ - error = netmap_bdg_ctl(nmr, NULL); + NMG_LOCK(); + do { + u_int memflags; + + if (hdr->nr_name[0] != '\0') { + /* Build a nmreq_register out of the nmreq_port_info_get, + * so that we can call netmap_get_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_tx_slots = req->nr_tx_slots; + regreq.nr_rx_slots = req->nr_rx_slots; + regreq.nr_tx_rings = req->nr_tx_rings; + regreq.nr_rx_rings = req->nr_rx_rings; + regreq.nr_mem_id = req->nr_mem_id; + + /* get a refcount */ + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */); + hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; /* reset type */ + hdr->nr_body = (uint64_t)req; /* reset nr_body */ + if (error) { + na = NULL; + ifp = NULL; + break; + } + nmd = na->nm_mem; /* get memory allocator */ + } else { + nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1); + if (nmd == NULL) { + error = EINVAL; + break; + } + } + + error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags, + &req->nr_mem_id); + if (error) + break; + if (na == NULL) /* only memory info */ + break; + req->nr_offset = 0; + req->nr_rx_slots = req->nr_tx_slots = 0; + netmap_update_config(na); + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + } while (0); + netmap_unget_na(na, ifp); + NMG_UNLOCK(); + break; + } +#ifdef WITH_VALE + case NETMAP_REQ_VALE_ATTACH: { + error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */); + break; + } + + case NETMAP_REQ_VALE_DETACH: { + error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */); + break; + } + + case NETMAP_REQ_VALE_LIST: { + error = netmap_bdg_list(hdr); break; - } else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) { - /* forward the command to the ptnetmap subsystem */ - error = ptnetmap_ctl(nmr, priv->np_na); + } + + case NETMAP_REQ_PORT_HDR_SET: { + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + /* Build a nmreq_register out of the nmreq_port_hdr, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + /* For now we only support virtio-net headers, and only for + * VALE ports, but this may change in future. Valid lengths + * for the virtio-net header are 0 (no header), 10 and 12. */ + if (req->nr_hdr_len != 0 && + req->nr_hdr_len != sizeof(struct nm_vnet_hdr) && + req->nr_hdr_len != 12) { + error = EINVAL; + break; + } + NMG_LOCK(); + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_bdg_na(hdr, &na, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET; + hdr->nr_body = (uint64_t)req; + if (na && !error) { + struct netmap_vp_adapter *vpna = + (struct netmap_vp_adapter *)na; + na->virt_hdr_len = req->nr_hdr_len; + if (na->virt_hdr_len) { + vpna->mfs = NETMAP_BUF_SIZE(na); + } + D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); + netmap_adapter_put(na); + } else if (!na) { + error = ENXIO; + } + NMG_UNLOCK(); break; - } else if (i == NETMAP_VNET_HDR_GET) { - /* get vnet-header length for this netmap port */ + } + + case NETMAP_REQ_PORT_HDR_GET: { + /* Get vnet-header length for this netmap port */ + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + /* Build a nmreq_register out of the nmreq_port_hdr, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; struct ifnet *ifp; + bzero(®req, sizeof(regreq)); NMG_LOCK(); - error = netmap_get_na(nmr, &na, &ifp, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET; + hdr->nr_body = (uint64_t)req; if (na && !error) { - nmr->nr_arg1 = na->virt_hdr_len; + req->nr_hdr_len = na->virt_hdr_len; } netmap_unget_na(na, ifp); NMG_UNLOCK(); break; - } else if (i == NETMAP_POOLS_INFO_GET) { - /* get information from the memory allocator */ + } + + case NETMAP_REQ_VALE_NEWIF: { + error = nm_vi_create(hdr); + break; + } + + case NETMAP_REQ_VALE_DELIF: { + error = nm_vi_destroy(hdr->nr_name); + break; + } + + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: { + error = nm_bdg_polling(hdr); + break; + } +#endif /* WITH_VALE */ + case NETMAP_REQ_POOLS_INFO_GET: { + struct nmreq_pools_info *req = + (struct nmreq_pools_info *)hdr->nr_body; + /* Get information from the memory allocator. This + * netmap device must already be bound to a port. + * Note that hdr->nr_name is ignored. */ NMG_LOCK(); if (priv->np_na && priv->np_na->nm_mem) { struct netmap_mem_d *nmd = priv->np_na->nm_mem; - error = netmap_mem_pools_info_get(nmr, nmd); + error = netmap_mem_pools_info_get(req, nmd); } else { error = EINVAL; } NMG_UNLOCK(); break; - } else if (i == NETMAP_POOLS_CREATE) { - nmd = netmap_mem_ext_create(nmr, &error); - if (nmd == NULL) - break; - /* reset the fields used by POOLS_CREATE to - * avoid confusing the rest of the code - */ - nmr->nr_cmd = 0; - nmr->nr_arg1 = 0; - nmr->nr_arg2 = 0; - nmr->nr_arg3 = 0; - } else if (i != 0) { - D("nr_cmd must be 0 not %d", i); + } + + default: { error = EINVAL; break; } - - /* protect access to priv from concurrent NIOCREGIF */ - NMG_LOCK(); - do { - u_int memflags; - uint64_t memsize; - - if (priv->np_nifp != NULL) { /* thread already registered */ - error = EBUSY; - break; - } - - if (nmr->nr_arg2) { - /* find the allocator and get a reference */ - nmd = netmap_mem_find(nmr->nr_arg2); - if (nmd == NULL) { - error = EINVAL; - break; - } - } - /* find the interface and a reference */ - error = netmap_get_na(nmr, &na, &ifp, nmd, - 1 /* create */); /* keep reference */ - if (error) - break; - if (NETMAP_OWNED_BY_KERN(na)) { - error = EBUSY; - break; - } - - if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) { - error = EIO; - break; - } - - error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags); - if (error) { /* reg. failed, release priv and ref */ - break; - } - nifp = priv->np_nifp; - priv->np_td = td; // XXX kqueue, debugging only - - /* return the offset of the netmap_if object */ - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags, - &nmr->nr_arg2); - if (error) { - netmap_do_unregif(priv); - break; - } - nmr->nr_memsize = (uint32_t)memsize; - if (memflags & NETMAP_MEM_PRIVATE) { - *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; - } - for_rx_tx(t) { - priv->np_si[t] = nm_si_user(priv, t) ? - &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si; - } - - if (nmr->nr_arg3) { - if (netmap_verbose) - D("requested %d extra buffers", nmr->nr_arg3); - nmr->nr_arg3 = netmap_extra_alloc(na, - &nifp->ni_bufs_head, nmr->nr_arg3); - if (netmap_verbose) - D("got %d extra buffers", nmr->nr_arg3); - } - nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); - - /* store ifp reference so that priv destructor may release it */ - priv->np_ifp = ifp; - } while (0); - if (error) { - netmap_unget_na(na, ifp); } - /* release the reference from netmap_mem_find() or - * netmap_mem_ext_create() - */ - if (nmd) - netmap_mem_put(nmd); - NMG_UNLOCK(); + /* Write back request body to userspace and reset the + * user-space pointer. */ + error = nmreq_copyout(hdr, error); break; + } case NIOCTXSYNC: - case NIOCRXSYNC: + case NIOCRXSYNC: { nifp = priv->np_nifp; if (nifp == NULL) { @@ -2506,7 +2647,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread sync_flags = priv->np_sync_flags; for (i = qfirst; i < qlast; i++) { - struct netmap_kring *kring = krings + i; + struct netmap_kring *kring = krings[i]; struct netmap_ring *ring = kring->ring; if (unlikely(nm_kr_tryget(kring, 1, &error))) { @@ -2549,51 +2690,292 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } break; + } -#ifdef WITH_VALE - case NIOCCONFIG: - error = netmap_bdg_config(nmr); - break; -#endif -#ifdef __FreeBSD__ - case FIONBIO: - case FIOASYNC: - ND("FIONBIO/FIOASYNC are no-ops"); + default: { + return netmap_ioctl_legacy(priv, cmd, data, td); break; + } + } + + return (error); +} + +size_t +nmreq_size_by_type(uint16_t nr_reqtype) +{ + switch (nr_reqtype) { + case NETMAP_REQ_REGISTER: + return sizeof(struct nmreq_register); + case NETMAP_REQ_PORT_INFO_GET: + return sizeof(struct nmreq_port_info_get); + case NETMAP_REQ_VALE_ATTACH: + return sizeof(struct nmreq_vale_attach); + case NETMAP_REQ_VALE_DETACH: + return sizeof(struct nmreq_vale_detach); + case NETMAP_REQ_VALE_LIST: + return sizeof(struct nmreq_vale_list); + case NETMAP_REQ_PORT_HDR_SET: + case NETMAP_REQ_PORT_HDR_GET: + return sizeof(struct nmreq_port_hdr); + case NETMAP_REQ_VALE_NEWIF: + return sizeof(struct nmreq_vale_newif); + case NETMAP_REQ_VALE_DELIF: + return 0; + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: + return sizeof(struct nmreq_vale_polling); + case NETMAP_REQ_POOLS_INFO_GET: + return sizeof(struct nmreq_pools_info); + } + return 0; +} - case BIOCIMMEDIATE: - case BIOCGHDRCMPLT: - case BIOCSHDRCMPLT: - case BIOCSSEESENT: - D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); +static size_t +nmreq_opt_size_by_type(uint16_t nro_reqtype) +{ + size_t rv = sizeof(struct nmreq_option); +#ifdef NETMAP_REQ_OPT_DEBUG + if (nro_reqtype & NETMAP_REQ_OPT_DEBUG) + return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG); +#endif /* NETMAP_REQ_OPT_DEBUG */ + switch (nro_reqtype) { +#ifdef WITH_EXTMEM + case NETMAP_REQ_OPT_EXTMEM: + rv = sizeof(struct nmreq_opt_extmem); break; +#endif /* WITH_EXTMEM */ + } + /* subtract the common header */ + return rv - sizeof(struct nmreq_option); +} - default: /* allow device-specific ioctls */ - { - struct ifnet *ifp = ifunit_ref(nmr->nr_name); - if (ifp == NULL) { - error = ENXIO; - } else { - struct socket so; +int +nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user) +{ + size_t rqsz, optsz, bufsz; + int error; + char *ker = NULL, *p; + struct nmreq_option **next, *src; + struct nmreq_option buf; + uint64_t *ptrs; + + if (hdr->nr_reserved) + return EINVAL; - bzero(&so, sizeof(so)); - so.so_vnet = ifp->if_vnet; - // so->so_proto not null. - error = ifioctl(&so, cmd, data, td); - if_rele(ifp); + if (!nr_body_is_user) + return 0; + + hdr->nr_reserved = nr_body_is_user; + + /* compute the total size of the buffer */ + rqsz = nmreq_size_by_type(hdr->nr_reqtype); + if (rqsz > NETMAP_REQ_MAXSIZE) { + error = EMSGSIZE; + goto out_err; + } + if ((rqsz && hdr->nr_body == (uint64_t)NULL) || + (!rqsz && hdr->nr_body != (uint64_t)NULL)) { + /* Request body expected, but not found; or + * request body found but unexpected. */ + error = EINVAL; + goto out_err; + } + + bufsz = 2 * sizeof(void *) + rqsz; + optsz = 0; + for (src = (struct nmreq_option *)hdr->nr_options; src; + src = (struct nmreq_option *)buf.nro_next) + { + error = copyin(src, &buf, sizeof(*src)); + if (error) + goto out_err; + optsz += sizeof(*src); + optsz += nmreq_opt_size_by_type(buf.nro_reqtype); + if (rqsz + optsz > NETMAP_REQ_MAXSIZE) { + error = EMSGSIZE; + goto out_err; } - break; - } + bufsz += optsz + sizeof(void *); + } -#else /* linux */ - default: - error = EOPNOTSUPP; -#endif /* linux */ + ker = nm_os_malloc(bufsz); + if (ker == NULL) { + error = ENOMEM; + goto out_err; } + p = ker; - return (error); + /* make a copy of the user pointers */ + ptrs = (uint64_t*)p; + *ptrs++ = hdr->nr_body; + *ptrs++ = hdr->nr_options; + p = (char *)ptrs; + + /* copy the body */ + error = copyin((void *)hdr->nr_body, p, rqsz); + if (error) + goto out_restore; + /* overwrite the user pointer with the in-kernel one */ + hdr->nr_body = (uint64_t)p; + p += rqsz; + + /* copy the options */ + next = (struct nmreq_option **)&hdr->nr_options; + src = *next; + while (src) { + struct nmreq_option *opt; + + /* copy the option header */ + ptrs = (uint64_t *)p; + opt = (struct nmreq_option *)(ptrs + 1); + error = copyin(src, opt, sizeof(*src)); + if (error) + goto out_restore; + /* make a copy of the user next pointer */ + *ptrs = opt->nro_next; + /* overwrite the user pointer with the in-kernel one */ + *next = opt; + + /* initialize the option as not supported. + * Recognized options will update this field. + */ + opt->nro_status = EOPNOTSUPP; + + p = (char *)(opt + 1); + + /* copy the option body */ + optsz = nmreq_opt_size_by_type(opt->nro_reqtype); + if (optsz) { + /* the option body follows the option header */ + error = copyin(src + 1, p, optsz); + if (error) + goto out_restore; + p += optsz; + } + + /* move to next option */ + next = (struct nmreq_option **)&opt->nro_next; + src = *next; + } + return 0; + +out_restore: + ptrs = (uint64_t *)ker; + hdr->nr_body = *ptrs++; + hdr->nr_options = *ptrs++; + hdr->nr_reserved = 0; + nm_os_free(ker); +out_err: + return error; +} + +static int +nmreq_copyout(struct nmreq_header *hdr, int rerror) +{ + struct nmreq_option *src, *dst; + void *ker = (void *)hdr->nr_body, *bufstart; + uint64_t *ptrs; + size_t bodysz; + int error; + + if (!hdr->nr_reserved) + return rerror; + + /* restore the user pointers in the header */ + ptrs = (uint64_t *)ker - 2; + bufstart = ptrs; + hdr->nr_body = *ptrs++; + src = (struct nmreq_option *)hdr->nr_options; + hdr->nr_options = *ptrs; + + if (!rerror) { + /* copy the body */ + bodysz = nmreq_size_by_type(hdr->nr_reqtype); + error = copyout(ker, (void *)hdr->nr_body, bodysz); + if (error) { + rerror = error; + goto out; + } + } + + /* copy the options */ + dst = (struct nmreq_option *)hdr->nr_options; + while (src) { + size_t optsz; + uint64_t next; + + /* restore the user pointer */ + next = src->nro_next; + ptrs = (uint64_t *)src - 1; + src->nro_next = *ptrs; + + /* always copy the option header */ + error = copyout(src, dst, sizeof(*src)); + if (error) { + rerror = error; + goto out; + } + + /* copy the option body only if there was no error */ + if (!rerror && !src->nro_status) { + optsz = nmreq_opt_size_by_type(src->nro_reqtype); + if (optsz) { + error = copyout(src + 1, dst + 1, optsz); + if (error) { + rerror = error; + goto out; + } + } + } + src = (struct nmreq_option *)next; + dst = (struct nmreq_option *)*ptrs; + } + + +out: + hdr->nr_reserved = 0; + nm_os_free(bufstart); + return rerror; +} + +struct nmreq_option * +nmreq_findoption(struct nmreq_option *opt, uint16_t reqtype) +{ + for ( ; opt; opt = (struct nmreq_option *)opt->nro_next) + if (opt->nro_reqtype == reqtype) + return opt; + return NULL; +} + +int +nmreq_checkduplicate(struct nmreq_option *opt) { + uint16_t type = opt->nro_reqtype; + int dup = 0; + + while ((opt = nmreq_findoption((struct nmreq_option *)opt->nro_next, + type))) { + dup++; + opt->nro_status = EINVAL; + } + return (dup ? EINVAL : 0); } +static int +nmreq_checkoptions(struct nmreq_header *hdr) +{ + struct nmreq_option *opt; + /* return error if there is still any option + * marked as not supported + */ + + for (opt = (struct nmreq_option *)hdr->nr_options; opt; + opt = (struct nmreq_option *)opt->nro_next) + if (opt->nro_status == EOPNOTSUPP) + return EOPNOTSUPP; + + return 0; +} /* * select(2) and poll(2) handlers for the "netmap" device. @@ -2680,7 +3062,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) if (want_tx) { enum txrx t = NR_TX; for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; /* XXX compare ring->cur and kring->tail */ if (!nm_ring_empty(kring->ring)) { revents |= want[t]; @@ -2692,7 +3074,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) enum txrx t = NR_RX; want_rx = 0; /* look for a reason to run the handlers */ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */ || kring->rhead != kring->ring->head /* release buffers */) { want_rx = 1; @@ -2706,9 +3088,9 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) #ifdef linux /* The selrecord must be unconditional on linux. */ nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); #endif /* linux */ /* @@ -2728,16 +3110,16 @@ flush_tx: for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) { int found = 0; - kring = &na->tx_rings[i]; + kring = na->tx_rings[i]; ring = kring->ring; /* * Don't try to txsync this TX ring if we already found some * space in some of the TX rings (want_tx == 0) and there are no * TX slots in this ring that need to be flushed to the NIC - * (cur == hwcur). + * (head == hwcur). */ - if (!send_down && !want_tx && ring->cur == kring->nr_hwcur) + if (!send_down && !want_tx && ring->head == kring->nr_hwcur) continue; if (nm_kr_tryget(kring, 1, &revents)) @@ -2774,7 +3156,7 @@ flush_tx: if (want_tx && retry_tx && sr) { #ifndef linux nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); #endif /* !linux */ retry_tx = 0; goto flush_tx; @@ -2791,7 +3173,7 @@ do_retry_rx: for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) { int found = 0; - kring = &na->rx_rings[i]; + kring = na->rx_rings[i]; ring = kring->ring; if (unlikely(nm_kr_tryget(kring, 1, &revents))) @@ -2835,7 +3217,7 @@ do_retry_rx: #ifndef linux if (retry_rx && sr) { nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); } #endif /* !linux */ if (send_down || retry_rx) { @@ -2871,7 +3253,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff) for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; int on = !(kring->nr_kflags & NKR_NOINTR); if (!!onoff != !!on) { @@ -2907,7 +3289,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff) static int netmap_notify(struct netmap_kring *kring, int flags) { - struct netmap_adapter *na = kring->na; + struct netmap_adapter *na = kring->notify_na; enum txrx t = kring->tx; nm_os_selwakeup(&kring->si); @@ -2934,6 +3316,11 @@ netmap_attach_common(struct netmap_adapter *na) return EINVAL; } + if (!na->rx_buf_maxsize) { + /* Set a conservative default (larger is safer). */ + na->rx_buf_maxsize = PAGE_SIZE; + } + #ifdef __FreeBSD__ if (na->na_flags & NAF_HOST_RINGS && na->ifp) { na->if_input = na->ifp->if_input; /* for netmap_send_up */ @@ -3149,7 +3536,7 @@ netmap_hw_krings_create(struct netmap_adapter *na) int ret = netmap_krings_create(na, 0); if (ret == 0) { /* initialize the mbq for the sw rx ring */ - mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); + mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue); ND("initialized sw rx queue %d", na->num_rx_rings); } return ret; @@ -3213,7 +3600,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) struct mbq *q; int busy; - kring = &na->rx_rings[na->num_rx_rings]; + kring = na->rx_rings[na->num_rx_rings]; // XXX [Linux] we do not need this lock // if we follow the down/configure/up protocol -gl // mtx_lock(&na->core_lock); @@ -3228,7 +3615,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) if (txr >= na->num_tx_rings) { txr %= na->num_tx_rings; } - tx_kring = &NMR(na, NR_TX)[txr]; + tx_kring = NMR(na, NR_TX)[txr]; if (tx_kring->nr_mode == NKR_NETMAP_OFF) { return MBUF_TRANSMIT(na, ifp, m); @@ -3316,7 +3703,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, if (n >= na->num_tx_rings) return NULL; - kring = na->tx_rings + n; + kring = na->tx_rings[n]; if (kring->nr_pending_mode == NKR_NETMAP_OFF) { kring->nr_mode = NKR_NETMAP_OFF; @@ -3328,7 +3715,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, } else { if (n >= na->num_rx_rings) return NULL; - kring = na->rx_rings + n; + kring = na->rx_rings[n]; if (kring->nr_pending_mode == NKR_NETMAP_OFF) { kring->nr_mode = NKR_NETMAP_OFF; @@ -3396,7 +3783,7 @@ netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done) if (q >= nma_get_nrings(na, t)) return NM_IRQ_PASS; // not a physical queue - kring = NMR(na, t) + q; + kring = NMR(na, t)[q]; if (kring->nr_mode == NKR_NETMAP_OFF) { return NM_IRQ_PASS; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index c122dc64bed26..cc63b4b478617 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -619,6 +619,116 @@ nm_os_vi_detach(struct ifnet *ifp) if_free(ifp); } +#ifdef WITH_EXTMEM +#include +#include +struct nm_os_extmem { + vm_object_t obj; + vm_offset_t kva; + vm_offset_t size; + vm_pindex_t scan; +}; + +void +nm_os_extmem_delete(struct nm_os_extmem *e) +{ + D("freeing %lx bytes", e->size); + vm_map_remove(kernel_map, e->kva, e->kva + e->size); + nm_os_free(e); +} + +char * +nm_os_extmem_nextpage(struct nm_os_extmem *e) +{ + char *rv = NULL; + if (e->scan < e->kva + e->size) { + rv = (char *)e->scan; + e->scan += PAGE_SIZE; + } + return rv; +} + +int +nm_os_extmem_isequal(struct nm_os_extmem *e1, struct nm_os_extmem *e2) +{ + return (e1->obj == e1->obj); +} + +int +nm_os_extmem_nr_pages(struct nm_os_extmem *e) +{ + return e->size >> PAGE_SHIFT; +} + +struct nm_os_extmem * +nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror) +{ + vm_map_t map; + vm_map_entry_t entry; + vm_object_t obj; + vm_prot_t prot; + vm_pindex_t index; + boolean_t wired; + struct nm_os_extmem *e = NULL; + int rv, error = 0; + + e = nm_os_malloc(sizeof(*e)); + if (e == NULL) { + error = ENOMEM; + goto out; + } + + map = &curthread->td_proc->p_vmspace->vm_map; + rv = vm_map_lookup(&map, p, VM_PROT_RW, &entry, + &obj, &index, &prot, &wired); + if (rv != KERN_SUCCESS) { + D("address %lx not found", p); + goto out_free; + } + /* check that we are given the whole vm_object ? */ + vm_map_lookup_done(map, entry); + + // XXX can we really use obj after releasing the map lock? + e->obj = obj; + vm_object_reference(obj); + /* wire the memory and add the vm_object to the kernel map, + * to make sure that it is not fred even if the processes that + * are mmap()ing it all exit + */ + e->kva = vm_map_min(kernel_map); + e->size = obj->size << PAGE_SHIFT; + rv = vm_map_find(kernel_map, obj, 0, &e->kva, e->size, 0, + VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE, 0); + if (rv != KERN_SUCCESS) { + D("vm_map_find(%lx) failed", e->size); + goto out_rel; + } + rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size, + VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); + if (rv != KERN_SUCCESS) { + D("vm_map_wire failed"); + goto out_rem; + } + + e->scan = e->kva; + + return e; + +out_rem: + vm_map_remove(kernel_map, e->kva, e->kva + e->size); + e->obj = NULL; +out_rel: + vm_object_deallocate(e->obj); +out_free: + nm_os_free(e); +out: + if (perror) + *perror = error; + return NULL; +} +#endif /* WITH_EXTMEM */ + /* ======================== PTNETMAP SUPPORT ========================== */ #ifdef WITH_PTNETMAP_GUEST @@ -1151,16 +1261,10 @@ nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity) } struct nm_kctx * -nm_os_kctx_create(struct nm_kctx_cfg *cfg, unsigned int cfgtype, - void *opaque) +nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque) { struct nm_kctx *nmk = NULL; - if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) { - D("Unsupported cfgtype %u", cfgtype); - return NULL; - } - nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO); if (!nmk) return NULL; @@ -1429,7 +1533,7 @@ freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, error = ENXIO; goto out; } - error = netmap_ioctl(priv, cmd, data, td); + error = netmap_ioctl(priv, cmd, data, td, /*nr_body_is_user=*/1); out: CURVNET_RESTORE(); diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 2ed251a557756..24d36d5db1b30 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -232,7 +232,7 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #define for_each_kring_n(_i, _k, _karr, _n) \ - for (_k=_karr, _i = 0; _i < _n; (_k)++, (_i)++) + for ((_k)=*(_karr), (_i) = 0; (_i) < (_n); (_i)++, (_k) = (_karr)[(_i)]) #define for_each_tx_kring(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings) @@ -589,7 +589,7 @@ generic_mbuf_destructor(struct mbuf *m) for (;;) { bool match = false; - kring = &na->tx_rings[r]; + kring = na->tx_rings[r]; mtx_lock_spin(&kring->tx_event_lock); if (kring->tx_event == m) { kring->tx_event = NULL; @@ -953,7 +953,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m) r = r % na->num_rx_rings; } - kring = &na->rx_rings[r]; + kring = na->rx_rings[r]; if (kring->nr_mode == NKR_NETMAP_OFF) { /* We must not intercept this mbuf. */ diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 3e64510913242..8fc71b8e820ef 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -77,7 +77,7 @@ #define WITH_GENERIC #define WITH_PTNETMAP_HOST /* ptnetmap host support */ #define WITH_PTNETMAP_GUEST /* ptnetmap guest support */ - +#define WITH_EXTMEM #endif #if defined(__FreeBSD__) @@ -367,9 +367,6 @@ struct netmap_zmon_list { * the next empty buffer as known by the hardware (next_to_check or so). * TX rings: hwcur + hwofs coincides with next_to_send * - * For received packets, slot->flags is set to nkr_slot_flags - * so we can provide a proper initial value. - * * The following fields are used to implement lock-free copy of packets * from input to output ports in VALE switch: * nkr_hwlease buffer after the last one being copied. @@ -401,7 +398,7 @@ struct netmap_zmon_list { struct netmap_kring { struct netmap_ring *ring; - uint32_t nr_hwcur; + uint32_t nr_hwcur; /* should be nr_hwhead */ uint32_t nr_hwtail; /* @@ -424,6 +421,7 @@ struct netmap_kring { * by ptnetmap host ports) */ #define NKR_NOINTR 0x10 /* don't use interrupts on this ring */ +#define NKR_FAKERING 0x20 /* don't allocate/free buffers */ uint32_t nr_mode; uint32_t nr_pending_mode; @@ -450,7 +448,14 @@ struct netmap_kring { NM_LOCK_T q_lock; /* protects kring and ring. */ NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ + /* the adapter the owns this kring */ struct netmap_adapter *na; + + /* the adapter that wants to be notified when this kring has + * new slots avaialable. This is usually the same as the above, + * but wrappers may let it point to themselves + */ + struct netmap_adapter *notify_na; /* The following fields are for VALE switch support */ struct nm_bdg_fwd *nkr_ft; @@ -630,6 +635,15 @@ struct netmap_lut { struct netmap_vp_adapter; // forward +/* Struct to be filled by nm_config callbacks. */ +struct nm_config_info { + unsigned num_tx_rings; + unsigned num_rx_rings; + unsigned num_tx_descs; + unsigned num_rx_descs; + unsigned rx_buf_maxsize; +}; + /* * The "struct netmap_adapter" extends the "struct adapter" * (or equivalent) device descriptor. @@ -690,8 +704,8 @@ struct netmap_adapter { * as a contiguous chunk of memory. Each array has * N+1 entries, for the adapter queues and for the host queue. */ - struct netmap_kring *tx_rings; /* array of TX rings. */ - struct netmap_kring *rx_rings; /* array of RX rings. */ + struct netmap_kring **tx_rings; /* array of TX rings. */ + struct netmap_kring **rx_rings; /* array of RX rings. */ void *tailroom; /* space below the rings array */ /* (used for leases) */ @@ -766,8 +780,7 @@ struct netmap_adapter { #define NAF_FORCE_RECLAIM 2 #define NAF_CAN_FORWARD_DOWN 4 /* return configuration information */ - int (*nm_config)(struct netmap_adapter *, - u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); + int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info); int (*nm_krings_create)(struct netmap_adapter *); void (*nm_krings_delete)(struct netmap_adapter *); #ifdef WITH_VALE @@ -787,7 +800,7 @@ struct netmap_adapter { * Called with NMG_LOCK held. */ int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *); - int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int); + int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *); /* adapter used to attach this adapter to a VALE switch (if any) */ struct netmap_vp_adapter *na_vp; @@ -823,7 +836,13 @@ struct netmap_adapter { /* Offset of ethernet header for each packet. */ u_int virt_hdr_len; - char name[64]; + /* Max number of bytes that the NIC can store in the buffer + * referenced by each RX descriptor. This translates to the maximum + * bytes that a single netmap slot can reference. Larger packets + * require NS_MOREFRAG support. */ + unsigned rx_buf_maxsize; + + char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */ }; static __inline u_int @@ -856,7 +875,7 @@ nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v) na->num_rx_rings = v; } -static __inline struct netmap_kring* +static __inline struct netmap_kring** NMR(struct netmap_adapter *na, enum txrx t) { return (t == NR_TX ? na->tx_rings : na->rx_rings); @@ -1011,12 +1030,22 @@ struct netmap_bwrap_adapter { */ struct netmap_priv_d *na_kpriv; struct nm_bdg_polling_state *na_polling_state; + /* we overwrite the hwna->na_vp pointer, so we save + * here its original value, to be restored at detach + */ + struct netmap_vp_adapter *saved_na_vp; }; +int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token); +int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token); +int nm_bdg_polling(struct nmreq_header *hdr); int netmap_bwrap_attach(const char *name, struct netmap_adapter *); -int netmap_vi_create(struct nmreq *, int); +int netmap_vi_create(struct nmreq_header *hdr, int); +int nm_vi_create(struct nmreq_header *); +int nm_vi_destroy(const char *name); +int netmap_bdg_list(struct nmreq_header *hdr); #else /* !WITH_VALE */ -#define netmap_vi_create(nmr, a) (EOPNOTSUPP) +#define netmap_vi_create(hdr, a) (EOPNOTSUPP) #endif /* WITH_VALE */ #ifdef WITH_PIPES @@ -1024,10 +1053,12 @@ int netmap_vi_create(struct nmreq *, int); #define NM_MAXPIPES 64 /* max number of pipes per adapter */ struct netmap_pipe_adapter { + /* pipe identifier is up.name */ struct netmap_adapter up; - u_int id; /* pipe identifier */ - int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */ +#define NM_PIPE_ROLE_MASTER 0x1 +#define NM_PIPE_ROLE_SLAVE 0x2 + int role; /* either NM_PIPE_ROLE_MASTER or NM_PIPE_ROLE_SLAVE */ struct netmap_adapter *parent; /* adapter that owns the memory */ struct netmap_pipe_adapter *peer; /* the other end of the pipe */ @@ -1195,6 +1226,7 @@ int netmap_transmit(struct ifnet *, struct mbuf *); struct netmap_slot *netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, u_int new_cur); int netmap_ring_reinit(struct netmap_kring *); +int netmap_rings_config_get(struct netmap_adapter *, struct nm_config_info *); /* Return codes for netmap_*x_irq. */ enum { @@ -1255,10 +1287,10 @@ static inline void nm_update_hostrings_mode(struct netmap_adapter *na) { /* Process nr_mode and nr_pending_mode for host rings. */ - na->tx_rings[na->num_tx_rings].nr_mode = - na->tx_rings[na->num_tx_rings].nr_pending_mode; - na->rx_rings[na->num_rx_rings].nr_mode = - na->rx_rings[na->num_rx_rings].nr_pending_mode; + na->tx_rings[na->num_tx_rings]->nr_mode = + na->tx_rings[na->num_tx_rings]->nr_pending_mode; + na->rx_rings[na->num_rx_rings]->nr_mode = + na->rx_rings[na->num_rx_rings]->nr_pending_mode; } /* set/clear native flags and if_transmit/netdev_ops */ @@ -1318,6 +1350,11 @@ nm_clear_native_flags(struct netmap_adapter *na) #endif } +#ifdef linux +int netmap_linux_config(struct netmap_adapter *na, + struct nm_config_info *info); +#endif /* linux */ + /* * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap * kthreads. @@ -1373,9 +1410,10 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *); */ int netmap_attach_common(struct netmap_adapter *); /* fill priv->np_[tr]xq{first,last} using the ringid and flags information - * coming from a struct nmreq + * coming from a struct nmreq_register */ -int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags); +int netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags); /* update the ring parameters (number and size of tx and rx rings). * It calls the nm_config callback, if available. */ @@ -1409,12 +1447,12 @@ void netmap_disable_all_rings(struct ifnet *); void netmap_enable_all_rings(struct ifnet *); int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags); + uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags); void netmap_do_unregif(struct netmap_priv_d *priv); u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); -int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, - struct ifnet **ifp, struct netmap_mem_d *nmd, int create); +int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct ifnet **ifp, struct netmap_mem_d *nmd, int create); void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp); int netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na); @@ -1430,18 +1468,19 @@ int netmap_get_hw_na(struct ifnet *ifp, * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate * drop. */ -typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, - struct netmap_vp_adapter *); +typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, + struct netmap_vp_adapter *, void *private_data); typedef int (*bdg_config_fn_t)(struct nm_ifreq *); typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); +typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); struct netmap_bdg_ops { bdg_lookup_fn_t lookup; bdg_config_fn_t config; bdg_dtor_fn_t dtor; }; -u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - struct netmap_vp_adapter *); +uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, + struct netmap_vp_adapter *, void *private_data); #define NM_BRIDGES 8 /* number of bridges */ #define NM_BDG_MAXPORTS 254 /* up to 254 */ @@ -1449,45 +1488,47 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) /* these are redefined in case of no VALE support */ -int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, +int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); struct nm_bridge *netmap_init_bridges2(u_int); void netmap_uninit_bridges2(struct nm_bridge *, u_int); int netmap_init_bridges(void); void netmap_uninit_bridges(void); -int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops); -int netmap_bdg_config(struct nmreq *nmr); +int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); +int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token); +int netmap_bdg_config(struct nm_ifreq *nifr); +void *netmap_bdg_create(const char *bdg_name, int *return_status); +int netmap_bdg_destroy(const char *bdg_name, void *auth_token); #else /* !WITH_VALE */ #define netmap_get_bdg_na(_1, _2, _3, _4) 0 #define netmap_init_bridges(_1) 0 #define netmap_uninit_bridges() -#define netmap_bdg_ctl(_1, _2) EINVAL +#define netmap_bdg_regops(_1, _2) EINVAL #endif /* !WITH_VALE */ #ifdef WITH_PIPES /* max number of pipes per device */ #define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */ void netmap_pipe_dealloc(struct netmap_adapter *); -int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create); +int netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create); #else /* !WITH_PIPES */ #define NM_MAXPIPES 0 #define netmap_pipe_alloc(_1, _2) 0 #define netmap_pipe_dealloc(_1) -#define netmap_get_pipe_na(nmr, _2, _3, _4) \ - ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \ - (role__ == NR_REG_PIPE_MASTER || \ - role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; }) +#define netmap_get_pipe_na(hdr, _2, _3, _4) \ + ((strchr(hdr->nr_name, '{') != NULL || strchr(hdr->nr_name, '}') != NULL) ? EOPNOTSUPP : 0) #endif #ifdef WITH_MONITOR -int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, +int netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); void netmap_monitor_stop(struct netmap_adapter *na); #else -#define netmap_get_monitor_na(nmr, _2, _3, _4) \ - ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) +#define netmap_get_monitor_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) #endif #ifdef CONFIG_NET_NS @@ -1508,7 +1549,11 @@ void netmap_fini(void); int netmap_get_memory(struct netmap_priv_d* p); void netmap_dtor(void *data); -int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *); +int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *, int nr_body_is_user); +int netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td); +size_t nmreq_size_by_type(uint16_t nr_reqtype); /* netmap_adapter creation/destruction */ @@ -1871,7 +1916,7 @@ static inline int nm_kring_pending(struct netmap_priv_d *np) for_rx_tx(t) { for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (kring->nr_mode != kring->nr_pending_mode) { return 1; } @@ -1980,7 +2025,7 @@ void nm_os_mitigation_cleanup(struct nm_generic_mit *mit); struct nm_bdg_fwd { /* forwarding entry for a bridge */ void *ft_buf; /* netmap or indirect buffer */ uint8_t ft_frags; /* how many fragments (only on 1st frag) */ - uint8_t _ft_port; /* dst port (unused) */ + uint16_t ft_offset; /* dst port (unused) */ uint16_t ft_flags; /* flags, e.g. indirect */ uint16_t ft_len; /* src fragment len */ uint16_t ft_next; /* next packet to same destination */ @@ -2094,7 +2139,6 @@ struct nm_kctx_cfg { }; /* kthread configuration */ struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg, - unsigned int cfgtype, void *opaque); int nm_os_kctx_worker_start(struct nm_kctx *); void nm_os_kctx_worker_stop(struct nm_kctx *); @@ -2120,19 +2164,21 @@ struct netmap_pt_host_adapter { int (*parent_nm_notify)(struct netmap_kring *kring, int flags); void *ptns; }; -/* ptnetmap HOST routines */ -int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d * nmd, int create); -int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na); + +/* ptnetmap host-side routines */ +int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d * nmd, int create); +int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na); + static inline int nm_ptnetmap_host_on(struct netmap_adapter *na) { return na && na->na_flags & NAF_PTNETMAP_HOST; } #else /* !WITH_PTNETMAP_HOST */ -#define netmap_get_pt_host_na(nmr, _2, _3, _4) \ - ((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) -#define ptnetmap_ctl(_1, _2) EINVAL +#define netmap_get_pt_host_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) +#define ptnetmap_ctl(_1, _2, _3) EINVAL #define nm_ptnetmap_host_on(_1) EINVAL #endif /* !WITH_PTNETMAP_HOST */ @@ -2175,4 +2221,7 @@ void ptnet_nm_krings_delete(struct netmap_adapter *na); void ptnet_nm_dtor(struct netmap_adapter *na); #endif /* WITH_PTNETMAP_GUEST */ +struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t); +int nmreq_checkduplicate(struct nmreq_option *); + #endif /* _NET_NETMAP_KERN_H_ */ diff --git a/sys/dev/netmap/netmap_legacy.c b/sys/dev/netmap/netmap_legacy.c new file mode 100644 index 0000000000000..da0d622958d9b --- /dev/null +++ b/sys/dev/netmap/netmap_legacy.c @@ -0,0 +1,428 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2018 Vincenzo Maffione + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* $FreeBSD$ */ + +#if defined(__FreeBSD__) +#include /* prerequisite */ +#include +#include /* defines used in kernel.h */ +#include /* FIONBIO */ +#include +#include /* struct socket */ +#include /* sockaddrs */ +#include +#include +#include +#include /* BIOCIMMEDIATE */ +#include /* bus_dmamap_* */ +#include +#elif defined(linux) +#include "bsd_glue.h" +#elif defined(__APPLE__) +#warning OSX support is only partial +#include "osx_glue.h" +#elif defined (_WIN32) +#include "win_glue.h" +#endif + +/* + * common headers + */ +#include +#include + +static int +nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr, + struct nmreq_register *req) +{ + req->nr_offset = nmr->nr_offset; + req->nr_memsize = nmr->nr_memsize; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + req->nr_ringid = nmr->nr_ringid & NETMAP_RING_MASK; + if ((nmr->nr_flags & NR_REG_MASK) == NR_REG_DEFAULT) { + /* Convert the older nmr->nr_ringid (original + * netmap control API) to nmr->nr_flags. */ + u_int regmode = NR_REG_DEFAULT; + if (req->nr_ringid & NETMAP_SW_RING) { + regmode = NR_REG_SW; + } else if (req->nr_ringid & NETMAP_HW_RING) { + regmode = NR_REG_ONE_NIC; + } else { + regmode = NR_REG_ALL_NIC; + } + nmr->nr_flags = regmode | + (nmr->nr_flags & (~NR_REG_MASK)); + } + req->nr_mode = nmr->nr_flags & NR_REG_MASK; + /* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */ + if (req->nr_mode == NR_REG_PIPE_MASTER || + req->nr_mode == NR_REG_PIPE_SLAVE) { + char suffix[10]; + snprintf(suffix, sizeof(suffix), "%c%d", + (req->nr_mode == NR_REG_PIPE_MASTER ? '{' : '}'), + req->nr_ringid); + if (strlen(hdr->nr_name) + strlen(suffix) + >= sizeof(hdr->nr_name)) { + /* No space for the pipe suffix. */ + return ENOBUFS; + } + strncat(hdr->nr_name, suffix, strlen(suffix)); + req->nr_mode = NR_REG_ALL_NIC; + req->nr_ringid = 0; + } + req->nr_flags = nmr->nr_flags & (~NR_REG_MASK); + if (nmr->nr_ringid & NETMAP_NO_TX_POLL) { + req->nr_flags |= NR_NO_TX_POLL; + } + if (nmr->nr_ringid & NETMAP_DO_RX_POLL) { + req->nr_flags |= NR_DO_RX_POLL; + } + /* nmr->nr_arg1 (nr_pipes) ignored */ + req->nr_extra_bufs = nmr->nr_arg3; + + return 0; +} + +/* Convert the legacy 'nmr' struct into one of the nmreq_xyz structs + * (new API). The new struct is dynamically allocated. */ +static struct nmreq_header * +nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd) +{ + struct nmreq_header *hdr = nm_os_malloc(sizeof(*hdr)); + + if (hdr == NULL) { + goto oom; + } + + /* Sanitize nmr->nr_name by adding the string terminator. */ + if (ioctl_cmd == NIOCGINFO || ioctl_cmd == NIOCREGIF) { + nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; + } + + /* First prepare the request header. */ + hdr->nr_version = NETMAP_API; /* new API */ + strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); + hdr->nr_options = (uint64_t)NULL; + hdr->nr_body = (uint64_t)NULL; + + switch (ioctl_cmd) { + case NIOCREGIF: { + switch (nmr->nr_cmd) { + case 0: { + /* Regular NIOCREGIF operation. */ + struct nmreq_register *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + if (nmreq_register_from_legacy(nmr, hdr, req)) { + goto oom; + } + break; + } + case NETMAP_BDG_ATTACH: { + struct nmreq_vale_attach *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_ATTACH; + if (nmreq_register_from_legacy(nmr, hdr, &req->reg)) { + goto oom; + } + /* Fix nr_mode, starting from nr_arg1. */ + if (nmr->nr_arg1 & NETMAP_BDG_HOST) { + req->reg.nr_mode = NR_REG_NIC_SW; + } else { + req->reg.nr_mode = NR_REG_ALL_NIC; + } + break; + } + case NETMAP_BDG_DETACH: { + hdr->nr_reqtype = NETMAP_REQ_VALE_DETACH; + hdr->nr_body = (uint64_t)nm_os_malloc(sizeof(struct nmreq_vale_detach)); + break; + } + case NETMAP_BDG_VNET_HDR: + case NETMAP_VNET_HDR_GET: { + struct nmreq_port_hdr *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_VNET_HDR) ? + NETMAP_REQ_PORT_HDR_SET : NETMAP_REQ_PORT_HDR_GET; + req->nr_hdr_len = nmr->nr_arg1; + break; + } + case NETMAP_BDG_NEWIF : { + struct nmreq_vale_newif *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + break; + } + case NETMAP_BDG_DELIF: { + hdr->nr_reqtype = NETMAP_REQ_VALE_DELIF; + break; + } + case NETMAP_BDG_POLLING_ON: + case NETMAP_BDG_POLLING_OFF: { + struct nmreq_vale_polling *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_POLLING_ON) ? + NETMAP_REQ_VALE_POLLING_ENABLE : + NETMAP_REQ_VALE_POLLING_DISABLE; + switch (nmr->nr_flags & NR_REG_MASK) { + default: + req->nr_mode = 0; /* invalid */ + break; + case NR_REG_ONE_NIC: + req->nr_mode = NETMAP_POLLING_MODE_MULTI_CPU; + break; + case NR_REG_ALL_NIC: + req->nr_mode = NETMAP_POLLING_MODE_SINGLE_CPU; + break; + } + req->nr_first_cpu_id = nmr->nr_ringid & NETMAP_RING_MASK; + req->nr_num_polling_cpus = nmr->nr_arg1; + break; + } + case NETMAP_PT_HOST_CREATE: + case NETMAP_PT_HOST_DELETE: { + D("Netmap passthrough not supported yet"); + return NULL; + break; + } + } + break; + } + case NIOCGINFO: { + if (nmr->nr_cmd == NETMAP_BDG_LIST) { + struct nmreq_vale_list *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_LIST; + req->nr_bridge_idx = nmr->nr_arg1; + req->nr_port_idx = nmr->nr_arg2; + } else { + /* Regular NIOCGINFO. */ + struct nmreq_port_info_get *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; + req->nr_offset = nmr->nr_offset; + req->nr_memsize = nmr->nr_memsize; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + } + break; + } + } + + return hdr; +oom: + if (hdr) { + if (hdr->nr_body) { + nm_os_free((void *)hdr->nr_body); + } + nm_os_free(hdr); + } + D("Failed to allocate memory for nmreq_xyz struct"); + + return NULL; +} + +static void +nmreq_register_to_legacy(const struct nmreq_register *req, struct nmreq *nmr) +{ + nmr->nr_offset = req->nr_offset; + nmr->nr_memsize = req->nr_memsize; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + nmr->nr_arg3 = req->nr_extra_bufs; +} + +/* Convert a nmreq_xyz struct (new API) to the legacy 'nmr' struct. + * It also frees the nmreq_xyz struct, as it was allocated by + * nmreq_from_legacy(). */ +static int +nmreq_to_legacy(struct nmreq_header *hdr, struct nmreq *nmr) +{ + int ret = 0; + + /* We only write-back the fields that the user expects to be + * written back. */ + switch (hdr->nr_reqtype) { + case NETMAP_REQ_REGISTER: { + struct nmreq_register *req = + (struct nmreq_register *)hdr->nr_body; + nmreq_register_to_legacy(req, nmr); + break; + } + case NETMAP_REQ_PORT_INFO_GET: { + struct nmreq_port_info_get *req = + (struct nmreq_port_info_get *)hdr->nr_body; + nmr->nr_offset = req->nr_offset; + nmr->nr_memsize = req->nr_memsize; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + break; + } + case NETMAP_REQ_VALE_ATTACH: { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + nmreq_register_to_legacy(&req->reg, nmr); + break; + } + case NETMAP_REQ_VALE_DETACH: { + break; + } + case NETMAP_REQ_VALE_LIST: { + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)hdr->nr_body; + strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); + nmr->nr_arg1 = req->nr_bridge_idx; + nmr->nr_arg2 = req->nr_port_idx; + break; + } + case NETMAP_REQ_PORT_HDR_SET: + case NETMAP_REQ_PORT_HDR_GET: { + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + nmr->nr_arg1 = req->nr_hdr_len; + break; + } + case NETMAP_REQ_VALE_NEWIF: { + struct nmreq_vale_newif *req = + (struct nmreq_vale_newif *)hdr->nr_body; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + break; + } + case NETMAP_REQ_VALE_DELIF: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: { + break; + } + } + + return ret; +} + +int +netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td) +{ + int error = 0; + + switch (cmd) { + case NIOCGINFO: + case NIOCREGIF: { + /* Request for the legacy control API. Convert it to a + * NIOCCTRL request. */ + struct nmreq *nmr = (struct nmreq *) data; + struct nmreq_header *hdr = nmreq_from_legacy(nmr, cmd); + if (hdr == NULL) { /* out of memory */ + return ENOMEM; + } + error = netmap_ioctl(priv, NIOCCTRL, (caddr_t)hdr, td, + /*nr_body_is_user=*/0); + if (error == 0) { + nmreq_to_legacy(hdr, nmr); + } + if (hdr->nr_body) { + nm_os_free((void *)hdr->nr_body); + } + nm_os_free(hdr); + break; + } +#ifdef WITH_VALE + case NIOCCONFIG: { + struct nm_ifreq *nr = (struct nm_ifreq *)data; + error = netmap_bdg_config(nr); + break; + } +#endif +#ifdef __FreeBSD__ + case FIONBIO: + case FIOASYNC: + ND("FIONBIO/FIOASYNC are no-ops"); + break; + + case BIOCIMMEDIATE: + case BIOCGHDRCMPLT: + case BIOCSHDRCMPLT: + case BIOCSSEESENT: + D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); + break; + + default: /* allow device-specific ioctls */ + { + struct nmreq *nmr = (struct nmreq *)data; + struct ifnet *ifp = ifunit_ref(nmr->nr_name); + if (ifp == NULL) { + error = ENXIO; + } else { + struct socket so; + + bzero(&so, sizeof(so)); + so.so_vnet = ifp->if_vnet; + // so->so_proto not null. + error = ifioctl(&so, cmd, data, td); + if_rele(ifp); + } + break; + } + +#else /* linux */ + default: + error = EOPNOTSUPP; +#endif /* linux */ + } + + return error; +} diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 1f206a1b02927..b6d2d7e817d61 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -110,6 +110,7 @@ struct netmap_obj_pool { uint32_t *bitmap; /* one bit per buffer, 1 means free */ uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */ uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ + int alloc_done; /* we have allocated the memory */ /* ---------------------------------------------------*/ /* limits */ @@ -131,7 +132,11 @@ struct netmap_obj_pool { }; #define NMA_LOCK_T NM_MTX_T - +#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) +#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) +#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) +#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) +#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) struct netmap_mem_ops { int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); @@ -179,56 +184,126 @@ struct netmap_mem_d { char name[NM_MEM_NAMESZ]; }; -/* - * XXX need to fix the case of t0 == void - */ -#define NMD_DEFCB(t0, name) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd) \ -{ \ - return nmd->ops->nmd_##name(nmd); \ +int +netmap_mem_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) +{ + int rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_get_lut(nmd, lut); + NMA_UNLOCK(nmd); + + return rv; } -#define NMD_DEFCB1(t0, name, t1) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \ -{ \ - return nmd->ops->nmd_##name(nmd, a1); \ +int +netmap_mem_get_info(struct netmap_mem_d *nmd, uint64_t *size, + u_int *memflags, nm_memid_t *memid) +{ + int rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_get_info(nmd, size, memflags, memid); + NMA_UNLOCK(nmd); + + return rv; } -#define NMD_DEFCB3(t0, name, t1, t2, t3) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \ -{ \ - return nmd->ops->nmd_##name(nmd, a1, a2, a3); \ +vm_paddr_t +netmap_mem_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off) +{ + vm_paddr_t pa; + +#if defined(__FreeBSD__) + /* This function is called by netmap_dev_pager_fault(), which holds a + * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we + * spin on the trylock. */ + NMA_SPINLOCK(nmd); +#else + NMA_LOCK(nmd); +#endif + pa = nmd->ops->nmd_ofstophys(nmd, off); + NMA_UNLOCK(nmd); + + return pa; } -#define NMD_DEFNACB(t0, name) \ -t0 \ -netmap_mem_##name(struct netmap_adapter *na) \ -{ \ - return na->nm_mem->ops->nmd_##name(na); \ +static int +netmap_mem_config(struct netmap_mem_d *nmd) +{ + if (nmd->active) { + /* already in use. Not fatal, but we + * cannot change the configuration + */ + return 0; + } + + return nmd->ops->nmd_config(nmd); } -#define NMD_DEFNACB1(t0, name, t1) \ -t0 \ -netmap_mem_##name(struct netmap_adapter *na, t1 a1) \ -{ \ - return na->nm_mem->ops->nmd_##name(na, a1); \ +ssize_t +netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *off) +{ + ssize_t rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_if_offset(nmd, off); + NMA_UNLOCK(nmd); + + return rv; } -NMD_DEFCB1(int, get_lut, struct netmap_lut *); -NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *); -NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t); -static int netmap_mem_config(struct netmap_mem_d *); -NMD_DEFCB(int, config); -NMD_DEFCB1(ssize_t, if_offset, const void *); -NMD_DEFCB(void, delete); +static void +netmap_mem_delete(struct netmap_mem_d *nmd) +{ + nmd->ops->nmd_delete(nmd); +} -NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *); -NMD_DEFNACB1(void, if_delete, struct netmap_if *); -NMD_DEFNACB(int, rings_create); -NMD_DEFNACB(void, rings_delete); +struct netmap_if * +netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) +{ + struct netmap_if *nifp; + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + nifp = nmd->ops->nmd_if_new(na, priv); + NMA_UNLOCK(nmd); + + return nifp; +} + +void +netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif) +{ + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + nmd->ops->nmd_if_delete(na, nif); + NMA_UNLOCK(nmd); +} + +int +netmap_mem_rings_create(struct netmap_adapter *na) +{ + int rv; + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_rings_create(na); + NMA_UNLOCK(nmd); + + return rv; +} + +void +netmap_mem_rings_delete(struct netmap_adapter *na) +{ + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + nmd->ops->nmd_rings_delete(na); + NMA_UNLOCK(nmd); +} static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); @@ -241,12 +316,6 @@ netmap_mem_get_id(struct netmap_mem_d *nmd) return nmd->nm_id; } -#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) -#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) -#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) -#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) -#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) - #ifdef NM_DEBUG_MEM_PUTGET #define NM_DBG_REFC(nmd, func, line) \ nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount); @@ -285,22 +354,32 @@ __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line) int netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { + int lasterr = 0; if (nm_mem_assign_group(nmd, na->pdev) < 0) { return ENOMEM; - } else { - NMA_LOCK(nmd); - nmd->lasterr = nmd->ops->nmd_finalize(nmd); - NMA_UNLOCK(nmd); } + NMA_LOCK(nmd); + + if (netmap_mem_config(nmd)) + goto out; + + nmd->active++; + + nmd->lasterr = nmd->ops->nmd_finalize(nmd); + if (!nmd->lasterr && na->pdev) { nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); - if (nmd->lasterr) { - netmap_mem_deref(nmd, na); - } } - return nmd->lasterr; +out: + lasterr = nmd->lasterr; + NMA_UNLOCK(nmd); + + if (lasterr) + netmap_mem_deref(nmd, na); + + return lasterr; } static int @@ -400,6 +479,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) } nmd->ops->nmd_deref(nmd); + nmd->active--; + if (!nmd->active) + nmd->nm_grp = -1; + NMA_UNLOCK(nmd); return last_user; } @@ -706,14 +789,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) vm_paddr_t pa; struct netmap_obj_pool *p; -#if defined(__FreeBSD__) - /* This function is called by netmap_dev_pager_fault(), which holds a - * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we - * spin on the trylock. */ - NMA_SPINLOCK(nmd); -#else - NMA_LOCK(nmd); -#endif p = nmd->pools; for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { @@ -727,7 +802,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr); pa.QuadPart += offset % p[i]._objsize; #endif - NMA_UNLOCK(nmd); return pa; } /* this is only in case of errors */ @@ -738,7 +812,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) p[NETMAP_IF_POOL].memtotal + p[NETMAP_RING_POOL].memtotal + p[NETMAP_BUF_POOL].memtotal); - NMA_UNLOCK(nmd); #ifndef _WIN32 return 0; /* bad address */ #else @@ -775,10 +848,10 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd) { - int i, j; - size_t memsize; u_int memflags, ofs = 0; PMDL mainMdl, tempMdl; + uint64_t memsize; + int i, j; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { D("memory not finalised yet"); @@ -847,11 +920,10 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize } static int -netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, - nm_memid_t *id) +netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, + u_int *memflags, nm_memid_t *id) { int error = 0; - NMA_LOCK(nmd); error = netmap_mem_config(nmd); if (error) goto out; @@ -872,7 +944,6 @@ netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, if (id) *id = nmd->nm_id; out: - NMA_UNLOCK(nmd); return error; } @@ -916,11 +987,7 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) static ssize_t netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr) { - ssize_t v; - NMA_LOCK(nmd); - v = netmap_if_offset(nmd, addr); - NMA_UNLOCK(nmd); - return v; + return netmap_if_offset(nmd, addr); } /* @@ -1118,7 +1185,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) slot[i].ptr = 0; } - ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); + ND("%s: allocated %d buffers, %d available, first at %d", p->name, n, p->objfree, pos); return (0); cleanup: @@ -1163,9 +1230,11 @@ netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) u_int i; for (i = 0; i < n; i++) { - if (slot[i].buf_idx > 2) + if (slot[i].buf_idx > 1) netmap_free_buf(nmd, slot[i].buf_idx); } + ND("%s: released some buffers, available: %u", + p->name, p->objfree); } static void @@ -1180,6 +1249,12 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) if (p->invalid_bitmap) nm_os_free(p->invalid_bitmap); p->invalid_bitmap = NULL; + if (!p->alloc_done) { + /* allocation was done by somebody else. + * Let them clean up after themselves. + */ + return; + } if (p->lut) { u_int i; @@ -1199,6 +1274,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) p->memtotal = 0; p->numclusters = 0; p->objfree = 0; + p->alloc_done = 0; } /* @@ -1310,13 +1386,20 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) size_t n; if (p->lut) { - /* already finalized, nothing to do */ + /* if the lut is already there we assume that also all the + * clusters have already been allocated, possibily by somebody + * else (e.g., extmem). In the latter case, the alloc_done flag + * will remain at zero, so that we will not attempt to + * deallocate the clusters by ourselves in + * netmap_reset_obj_allocator. + */ return 0; } /* optimistically assume we have enough memory */ p->numclusters = p->_numclusters; p->objtotal = p->_objtotal; + p->alloc_done = 1; p->lut = nm_alloc_lut(p->objtotal); if (p->lut == NULL) { @@ -1426,7 +1509,7 @@ netmap_mem_reset_all(struct netmap_mem_d *nmd) static int netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { - int i, lim = p->_objtotal; + int i, lim = p->objtotal; struct netmap_lut *lut = &na->na_lut; if (na == NULL || na->pdev == NULL) @@ -1675,10 +1758,6 @@ netmap_mem2_config(struct netmap_mem_d *nmd) { int i; - if (nmd->active) - /* already in use, we cannot change the configuration */ - goto out; - if (!netmap_mem_params_changed(nmd->params)) goto out; @@ -1707,19 +1786,8 @@ out: static int netmap_mem2_finalize(struct netmap_mem_d *nmd) { - int err; - - /* update configuration if changed */ - if (netmap_mem_config(nmd)) - goto out1; - - nmd->active++; - - if (nmd->flags & NETMAP_MEM_FINALIZED) { - /* may happen if config is not changed */ - D("nothing to do"); + if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; - } if (netmap_mem_finalize_all(nmd)) goto out; @@ -1727,13 +1795,7 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd) nmd->lasterr = 0; out: - if (nmd->lasterr) - nmd->active--; -out1: - err = nmd->lasterr; - - return err; - + return nmd->lasterr; } static void @@ -1782,7 +1844,7 @@ netmap_free_rings(struct netmap_adapter *na) for_rx_tx(t) { u_int i; for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { @@ -1793,8 +1855,12 @@ netmap_free_rings(struct netmap_adapter *na) } if (netmap_verbose) D("deleting ring %s", kring->name); - if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS) + if (!(kring->nr_kflags & NKR_FAKERING)) { + ND("freeing bufs for %s", kring->name); netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); + } else { + ND("NOT freeing bufs for %s", kring->name); + } netmap_ring_free(na->nm_mem, ring); kring->ring = NULL; } @@ -1813,13 +1879,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na) { enum txrx t; - NMA_LOCK(na->nm_mem); - for_rx_tx(t) { u_int i; for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; u_int len, ndesc; @@ -1857,14 +1921,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na) ND("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); ND("initializing slots for %s_ring", nm_txrx2str(txrx)); - if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) { + if (!(kring->nr_kflags & NKR_FAKERING)) { /* this is a real ring */ + ND("allocating buffers for %s", kring->name); if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { D("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); goto cleanup; } } else { /* this is a fake ring, set all indices to 0 */ + ND("NOT allocating buffers for %s", kring->name); netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); } /* ring info */ @@ -1873,15 +1939,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na) } } - NMA_UNLOCK(na->nm_mem); - return 0; cleanup: netmap_free_rings(na); - NMA_UNLOCK(na->nm_mem); - return ENOMEM; } @@ -1889,11 +1951,7 @@ static void netmap_mem2_rings_delete(struct netmap_adapter *na) { /* last instance, release bufs and rings */ - NMA_LOCK(na->nm_mem); - netmap_free_rings(na); - - NMA_UNLOCK(na->nm_mem); } @@ -1924,8 +1982,6 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * to the tx and rx rings in the shared memory region. */ - NMA_LOCK(na->nm_mem); - len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t)); nifp = netmap_if_malloc(na->nm_mem, len); if (nifp == NULL) { @@ -1949,10 +2005,10 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * ring, like we do for buffers? */ ssize_t ofs = 0; - if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX] + if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX] && i < priv->np_qlast[NR_TX]) { ofs = netmap_ring_offset(na->nm_mem, - na->tx_rings[i].ring) - base; + na->tx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs; } @@ -1961,16 +2017,14 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * ring, like we do for buffers? */ ssize_t ofs = 0; - if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX] + if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX] && i < priv->np_qlast[NR_RX]) { ofs = netmap_ring_offset(na->nm_mem, - na->rx_rings[i].ring) - base; + na->rx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs; } - NMA_UNLOCK(na->nm_mem); - return (nifp); } @@ -1980,21 +2034,15 @@ netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) if (nifp == NULL) /* nothing to do */ return; - NMA_LOCK(na->nm_mem); if (nifp->ni_bufs_head) netmap_extra_free(na, nifp->ni_bufs_head); netmap_if_free(na->nm_mem, nifp); - - NMA_UNLOCK(na->nm_mem); } static void netmap_mem2_deref(struct netmap_mem_d *nmd) { - nmd->active--; - if (!nmd->active) - nmd->nm_grp = -1; if (netmap_verbose) D("active = %d", nmd->active); @@ -2016,42 +2064,32 @@ struct netmap_mem_ops netmap_mem_global_ops = { }; int -netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) +netmap_mem_pools_info_get(struct nmreq_pools_info *req, + struct netmap_mem_d *nmd) { - uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; - struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp); - struct netmap_pools_info pi; - uint64_t memsize; - uint16_t memid; int ret; - ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid); + ret = netmap_mem_get_info(nmd, &req->nr_memsize, NULL, + &req->nr_mem_id); if (ret) { return ret; } - pi.memsize = memsize; - pi.memid = memid; NMA_LOCK(nmd); - pi.if_pool_offset = 0; - pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; - pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; + req->nr_if_pool_offset = 0; + req->nr_if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; + req->nr_if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; - pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; - pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; - pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; + req->nr_ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; + req->nr_ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; + req->nr_ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; - pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + + req->nr_buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal; - pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; - pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; + req->nr_buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; + req->nr_buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; NMA_UNLOCK(nmd); - ret = copyout(&pi, upi, sizeof(pi)); - if (ret) { - return ret; - } - return 0; } @@ -2059,8 +2097,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) struct netmap_mem_ext { struct netmap_mem_d up; - struct page **pages; - int nr_pages; + struct nm_os_extmem *os; struct netmap_mem_ext *next, *prev; }; @@ -2090,29 +2127,14 @@ netmap_mem_ext_unregister(struct netmap_mem_ext *e) e->prev = e->next = NULL; } -static int -netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages) -{ - int i; - - if (e->nr_pages != nr_pages) - return 0; - - for (i = 0; i < nr_pages; i++) - if (pages[i] != e->pages[i]) - return 0; - - return 1; -} - static struct netmap_mem_ext * -netmap_mem_ext_search(struct page **pages, int nr_pages) +netmap_mem_ext_search(struct nm_os_extmem *os) { struct netmap_mem_ext *e; NM_MTX_LOCK(nm_mem_ext_list_lock); for (e = netmap_mem_ext_list; e; e = e->next) { - if (netmap_mem_ext_same_pages(e, pages, nr_pages)) { + if (nm_os_extmem_isequal(e->os, os)) { netmap_mem_get(&e->up); break; } @@ -2122,18 +2144,6 @@ netmap_mem_ext_search(struct page **pages, int nr_pages) } -static void -netmap_mem_ext_free_pages(struct page **pages, int nr_pages) -{ - int i; - - for (i = 0; i < nr_pages; i++) { - kunmap(pages[i]); - put_page(pages[i]); - } - nm_os_vfree(pages); -} - static void netmap_mem_ext_delete(struct netmap_mem_d *d) { @@ -2151,11 +2161,8 @@ netmap_mem_ext_delete(struct netmap_mem_d *d) p->lut = NULL; } } - if (e->pages) { - netmap_mem_ext_free_pages(e->pages, e->nr_pages); - e->pages = NULL; - e->nr_pages = 0; - } + if (e->os) + nm_os_extmem_delete(e->os); netmap_mem2_delete(d); } @@ -2181,117 +2188,66 @@ struct netmap_mem_ops netmap_mem_ext_ops = { }; struct netmap_mem_d * -netmap_mem_ext_create(struct nmreq *nmr, int *perror) +netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror) { - uintptr_t p = *(uintptr_t *)&nmr->nr_arg1; - struct netmap_pools_info pi; int error = 0; - unsigned long end, start; - int nr_pages, res, i, j; - struct page **pages = NULL; + int i, j; struct netmap_mem_ext *nme; char *clust; size_t off; - - error = copyin((void *)p, &pi, sizeof(pi)); - if (error) - goto out; + struct nm_os_extmem *os = NULL; + int nr_pages; // XXX sanity checks - if (pi.if_pool_objtotal == 0) - pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; - if (pi.if_pool_objsize == 0) - pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; - if (pi.ring_pool_objtotal == 0) - pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; - if (pi.ring_pool_objsize == 0) - pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; - if (pi.buf_pool_objtotal == 0) - pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; - if (pi.buf_pool_objsize == 0) - pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; + if (pi->nr_if_pool_objtotal == 0) + pi->nr_if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; + if (pi->nr_if_pool_objsize == 0) + pi->nr_if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; + if (pi->nr_ring_pool_objtotal == 0) + pi->nr_ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; + if (pi->nr_ring_pool_objsize == 0) + pi->nr_ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; + if (pi->nr_buf_pool_objtotal == 0) + pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; + if (pi->nr_buf_pool_objsize == 0) + pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; D("if %d %d ring %d %d buf %d %d", - pi.if_pool_objtotal, pi.if_pool_objsize, - pi.ring_pool_objtotal, pi.ring_pool_objsize, - pi.buf_pool_objtotal, pi.buf_pool_objsize); + pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize, + pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize, + pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize); - end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = p >> PAGE_SHIFT; - nr_pages = end - start; - - pages = nm_os_vmalloc(nr_pages * sizeof(*pages)); - if (pages == NULL) { - error = ENOMEM; + os = nm_os_extmem_create(usrptr, pi, &error); + if (os == NULL) { + D("os extmem creation failed"); goto out; } -#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS - res = get_user_pages_unlocked( - p, - nr_pages, - pages, - FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags -#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS) - res = get_user_pages_unlocked( - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages); -#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS) - res = get_user_pages_unlocked( - current, - current->mm, - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages); -#else - down_read(¤t->mm->mmap_sem); - res = get_user_pages( - current, - current->mm, - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); -#endif /* NETMAP_LINUX_GUP */ - - if (res < nr_pages) { - error = EFAULT; - goto out_unmap; - } - - nme = netmap_mem_ext_search(pages, nr_pages); + nme = netmap_mem_ext_search(os); if (nme) { - netmap_mem_ext_free_pages(pages, nr_pages); + nm_os_extmem_delete(os); return &nme->up; } D("not found, creating new"); nme = _netmap_mem_private_new(sizeof(*nme), (struct netmap_obj_params[]){ - { pi.if_pool_objsize, pi.if_pool_objtotal }, - { pi.ring_pool_objsize, pi.ring_pool_objtotal }, - { pi.buf_pool_objsize, pi.buf_pool_objtotal }}, + { pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal }, + { pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal }, + { pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }}, &netmap_mem_ext_ops, &error); if (nme == NULL) goto out_unmap; + nr_pages = nm_os_extmem_nr_pages(os); + /* from now on pages will be released by nme destructor; * we let res = 0 to prevent release in out_unmap below */ - res = 0; - nme->pages = pages; - nme->nr_pages = nr_pages; - nme->up.flags |= NETMAP_MEM_EXT; + nme->os = os; + os = NULL; /* pass ownership */ - clust = kmap(*pages); + clust = nm_os_extmem_nextpage(nme->os); off = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &nme->up.pools[i]; @@ -2323,9 +2279,11 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) for (j = 0; j < o->num && nr_pages > 0; j++) { size_t noff; - size_t skip; p->lut[j].vaddr = clust + off; +#if !defined(linux) && !defined(_WIN32) + p->lut[j].paddr = vtophys(p->lut[j].vaddr); +#endif ND("%s %d at %p", p->name, j, p->lut[j].vaddr); noff = off + p->_objsize; if (noff < PAGE_SIZE) { @@ -2333,15 +2291,16 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) continue; } ND("too big, recomputing offset..."); - skip = PAGE_SIZE - (off & PAGE_MASK); while (noff >= PAGE_SIZE) { - noff -= skip; - pages++; + char *old_clust = clust; + noff -= PAGE_SIZE; + clust = nm_os_extmem_nextpage(nme->os); nr_pages--; ND("noff %zu page %p nr_pages %d", noff, page_to_virt(*pages), nr_pages); if (noff > 0 && !nm_isset(p->invalid_bitmap, j) && - (nr_pages == 0 || *pages != *(pages - 1) + 1)) + (nr_pages == 0 || + old_clust + PAGE_SIZE != clust)) { /* out of space or non contiguous, * drop this object @@ -2351,11 +2310,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) } if (nr_pages == 0) break; - skip = PAGE_SIZE; } off = noff; - if (nr_pages > 0) - clust = kmap(*pages); } p->objtotal = j; p->numclusters = p->objtotal; @@ -2363,12 +2319,6 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) ND("%d memtotal %u", j, p->memtotal); } - /* skip the first netmap_if, where the pools info reside */ - { - struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL]; - p->invalid_bitmap[0] |= 1U; - } - netmap_mem_ext_register(nme); return &nme->up; @@ -2376,10 +2326,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) out_delete: netmap_mem_put(&nme->up); out_unmap: - for (i = 0; i < res; i++) - put_page(pages[i]); - if (res) - nm_os_free(pages); + if (os) + nm_os_extmem_delete(os); out: if (perror) *perror = error; @@ -2504,8 +2452,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, { int error = 0; - NMA_LOCK(nmd); - error = nmd->ops->nmd_config(nmd); if (error) goto out; @@ -2518,7 +2464,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, *id = nmd->nm_id; out: - NMA_UNLOCK(nmd); return error; } @@ -2556,21 +2501,19 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) int i; int error = 0; - nmd->active++; - if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; if (ptnmd->ptn_dev == NULL) { D("ptnetmap memdev not attached"); error = ENOMEM; - goto err; + goto out; } /* Map memory through ptnetmap-memdev BAR. */ error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr, &ptnmd->nm_addr, &mem_size); if (error) - goto err; + goto out; /* Initialize the lut using the information contained in the * ptnetmap memory device. */ @@ -2605,11 +2548,16 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) ptnmd->buf_lut.objsize = bufsize; nmd->nm_totalsize = (unsigned int)mem_size; + /* Initialize these fields as are needed by + * netmap_mem_bufsize(). + * XXX please improve this, why do we need this + * replication? maybe we nmd->pools[] should no be + * there for the guest allocator? */ + nmd->pools[NETMAP_BUF_POOL]._objsize = bufsize; + nmd->pools[NETMAP_BUF_POOL]._objtotal = nbuffers; + nmd->flags |= NETMAP_MEM_FINALIZED; out: - return 0; -err: - nmd->active--; return error; } @@ -2618,8 +2566,7 @@ netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; - nmd->active--; - if (nmd->active <= 0 && + if (nmd->active == 1 && (nmd->flags & NETMAP_MEM_FINALIZED)) { nmd->flags &= ~NETMAP_MEM_FINALIZED; /* unmap ptnetmap-memdev memory */ @@ -2661,8 +2608,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv struct mem_pt_if *ptif; struct netmap_if *nifp = NULL; - NMA_LOCK(na->nm_mem); - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); @@ -2671,7 +2616,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) + ptif->nifp_offset); - NMA_UNLOCK(na->nm_mem); out: return nifp; } @@ -2681,12 +2625,10 @@ netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) { struct mem_pt_if *ptif; - NMA_LOCK(na->nm_mem); ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); } - NMA_UNLOCK(na->nm_mem); } static int @@ -2697,8 +2639,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) struct netmap_if *nifp; int i, error = -1; - NMA_LOCK(na->nm_mem); - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); @@ -2709,14 +2649,14 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) /* point each kring to the corresponding backend ring */ nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset); for (i = 0; i <= na->num_tx_rings; i++) { - struct netmap_kring *kring = na->tx_rings + i; + struct netmap_kring *kring = na->tx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) ((char *)nifp + nifp->ring_ofs[i]); } for (i = 0; i <= na->num_rx_rings; i++) { - struct netmap_kring *kring = na->rx_rings + i; + struct netmap_kring *kring = na->rx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) @@ -2726,8 +2666,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) error = 0; out: - NMA_UNLOCK(na->nm_mem); - return error; } diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index f0bee7a33fd53..977bf622862a0 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -137,12 +137,12 @@ void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *); int netmap_mem_rings_create(struct netmap_adapter *); void netmap_mem_rings_delete(struct netmap_adapter *); int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); -int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); -int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); +int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); +int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, + u_int *memflags, nm_memid_t *id); ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int* error); -void netmap_mem_delete(struct netmap_mem_d *); #define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__) #define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__) @@ -152,7 +152,7 @@ struct netmap_mem_d* netmap_mem_find(nm_memid_t); unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); #ifdef WITH_EXTMEM -struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *); +struct netmap_mem_d* netmap_mem_ext_create(uint64_t, struct nmreq_pools_info *, int *); #else /* !WITH_EXTMEM */ #define netmap_mem_ext_create(nmr, _perr) \ ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) @@ -167,7 +167,8 @@ struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16 int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *); #endif /* WITH_PTNETMAP_GUEST */ -int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); +int netmap_mem_pools_info_get(struct nmreq_pools_info *, + struct netmap_mem_d *); #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ @@ -175,4 +176,14 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); +#ifdef WITH_EXTMEM +#include +struct nm_os_extmem; /* opaque */ +struct nm_os_extmem *nm_os_extmem_create(unsigned long, struct nmreq_pools_info *, int *perror); +char *nm_os_extmem_nextpage(struct nm_os_extmem *); +int nm_os_extmem_nr_pages(struct nm_os_extmem *); +int nm_os_extmem_isequal(struct nm_os_extmem *, struct nm_os_extmem *); +void nm_os_extmem_delete(struct nm_os_extmem *); +#endif /* WITH_EXTMEM */ + #endif diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c index e7cc05f5ab0f0..f6b7c93adc768 100644 --- a/sys/dev/netmap/netmap_monitor.c +++ b/sys/dev/netmap/netmap_monitor.c @@ -167,8 +167,8 @@ netmap_monitor_krings_create(struct netmap_adapter *na) if (error) return error; /* override the host rings callbacks */ - na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync; - na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync; + na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync; + na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync; return 0; } @@ -390,7 +390,7 @@ netmap_monitor_stop(struct netmap_adapter *na) u_int i; for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_kring *zkring; u_int j; @@ -456,7 +456,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) } for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - mkring = &NMR(na, t)[i]; + mkring = NMR(na, t)[i]; if (!nm_kring_pending_on(mkring)) continue; mkring->nr_mode = NKR_NETMAP_ON; @@ -466,7 +466,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) if (i > nma_get_nrings(pna, s)) continue; if (mna->flags & nm_txrx2flag(s)) { - kring = &NMR(pna, s)[i]; + kring = NMR(pna, s)[i]; netmap_monitor_add(mkring, kring, zmon); } } @@ -478,7 +478,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - mkring = &NMR(na, t)[i]; + mkring = NMR(na, t)[i]; if (!nm_kring_pending_off(mkring)) continue; mkring->nr_mode = NKR_NETMAP_OFF; @@ -494,7 +494,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) if (i > nma_get_nrings(pna, s)) continue; if (mna->flags & nm_txrx2flag(s)) { - kring = &NMR(pna, s)[i]; + kring = NMR(pna, s)[i]; netmap_monitor_del(mkring, kring); } } @@ -824,38 +824,41 @@ netmap_monitor_dtor(struct netmap_adapter *na) } -/* check if nmr is a request for a monitor adapter that we can satisfy */ +/* check if req is a request for a monitor adapter that we can satisfy */ int -netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create) +netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create) { - struct nmreq pnmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; + struct nmreq_register preq; struct netmap_adapter *pna; /* parent adapter */ struct netmap_monitor_adapter *mna; struct ifnet *ifp = NULL; int error; - int zcopy = (nmr->nr_flags & NR_ZCOPY_MON); + int zcopy = (req->nr_flags & NR_ZCOPY_MON); char monsuff[10] = ""; if (zcopy) { - nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX); + req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX); } - if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { + if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { ND("not a monitor"); return 0; } /* this is a request for a monitor adapter */ - ND("flags %x", nmr->nr_flags); + ND("flags %lx", req->nr_flags); - /* first, try to find the adapter that we want to monitor - * We use the same nmr, after we have turned off the monitor flags. + /* First, try to find the adapter that we want to monitor. + * We use the same req, after we have turned off the monitor flags. * In this way we can potentially monitor everything netmap understands, * except other monitors. */ - memcpy(&pnmr, nmr, sizeof(pnmr)); - pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON); - error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create); + memcpy(&preq, req, sizeof(preq)); + preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON); + hdr->nr_body = (uint64_t)&preq; + error = netmap_get_na(hdr, &pna, &ifp, nmd, create); + hdr->nr_body = (uint64_t)req; if (error) { D("parent lookup failed: %d", error); return error; @@ -881,7 +884,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, mna->priv.np_na = pna; /* grab all the rings we need in the parent */ - error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags); + error = netmap_interp_ringid(&mna->priv, req->nr_mode, req->nr_ringid, + req->nr_flags); if (error) { D("ringid error"); goto free_out; @@ -892,8 +896,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name, monsuff, zcopy ? "z" : "", - (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "", - (nmr->nr_flags & NR_MONITOR_TX) ? "t" : ""); + (req->nr_flags & NR_MONITOR_RX) ? "r" : "", + (req->nr_flags & NR_MONITOR_TX) ? "t" : ""); /* the monitor supports the host rings iff the parent does */ mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS); @@ -913,10 +917,10 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, * the parent rings, but the user may ask for a different * number */ - mna->up.num_tx_desc = nmr->nr_tx_slots; + mna->up.num_tx_desc = req->nr_tx_slots; nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 1, NM_MONITOR_MAXSLOTS, NULL); - mna->up.num_rx_desc = nmr->nr_rx_slots; + mna->up.num_rx_desc = req->nr_rx_slots; nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 1, NM_MONITOR_MAXSLOTS, NULL); if (zcopy) { @@ -950,7 +954,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, } /* remember the traffic directions we have to monitor */ - mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON)); + mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON)); *na = &mna->up; netmap_adapter_get(*na); diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 48dde5382f77a..3b0fb869231b2 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -77,6 +77,7 @@ #ifdef WITH_PIPES #define NM_PIPE_MAXSLOTS 4096 +#define NM_PIPE_MAXRINGS 256 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */ SYSBEGIN(vars_pipes); @@ -129,14 +130,19 @@ netmap_pipe_dealloc(struct netmap_adapter *na) /* find a pipe endpoint with the given id among the parent's pipes */ static struct netmap_pipe_adapter * -netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id) +netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id) { int i; struct netmap_pipe_adapter *na; for (i = 0; i < parent->na_next_pipe; i++) { + const char *na_pipe_id; na = parent->na_pipes[i]; - if (na->id == pipe_id) { + na_pipe_id = strrchr(na->up.name, + na->role == NM_PIPE_ROLE_MASTER ? '{' : '}'); + KASSERT(na_pipe_id != NULL, ("Invalid pipe name")); + ++na_pipe_id; + if (!strcmp(na_pipe_id, pipe_id)) { return na; } } @@ -179,63 +185,46 @@ int netmap_pipe_txsync(struct netmap_kring *txkring, int flags) { struct netmap_kring *rxkring = txkring->pipe; - u_int limit; /* slots to transfer */ - u_int j, k, lim_tx = txkring->nkr_num_slots - 1, - lim_rx = rxkring->nkr_num_slots - 1; - int m, busy; + u_int k, lim = txkring->nkr_num_slots - 1; + int m; /* slots to transfer */ struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); - ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, + ND(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d", + txkring->nr_hwcur, txkring->nr_hwtail, txkring->rcur, txkring->rhead, txkring->rtail); - j = rxkring->nr_hwtail; /* RX */ - k = txkring->nr_hwcur; /* TX */ m = txkring->rhead - txkring->nr_hwcur; /* new slots */ if (m < 0) m += txkring->nkr_num_slots; - limit = m; - m = lim_rx; /* max avail space on destination */ - busy = j - rxkring->nr_hwcur; /* busy slots */ - if (busy < 0) - busy += rxkring->nkr_num_slots; - m -= busy; /* subtract busy slots */ - ND(2, "m %d limit %d", m, limit); - if (m < limit) - limit = m; - - if (limit == 0) { - /* either the rxring is full, or nothing to send */ + + if (m == 0) { + /* nothing to send */ return 0; } - while (limit-- > 0) { - struct netmap_slot *rs = &rxring->slot[j]; + for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) { + struct netmap_slot *rs = &rxring->slot[k]; struct netmap_slot *ts = &txring->slot[k]; - struct netmap_slot tmp; - - __builtin_prefetch(ts + 1); - /* swap the slots and report the buffer change */ - tmp = *rs; - tmp.flags |= NS_BUF_CHANGED; - *rs = *ts; - rs->flags |= NS_BUF_CHANGED; - *ts = tmp; + rs->len = ts->len; + rs->ptr = ts->ptr; - j = nm_next(j, lim_rx); - k = nm_next(k, lim_tx); + if (ts->flags & NS_BUF_CHANGED) { + rs->buf_idx = ts->buf_idx; + rs->flags |= NS_BUF_CHANGED; + ts->flags &= ~NS_BUF_CHANGED; + } } mb(); /* make sure the slots are updated before publishing them */ - rxkring->nr_hwtail = j; + rxkring->nr_hwtail = k; txkring->nr_hwcur = k; - txkring->nr_hwtail = nm_prev(k, lim_tx); - ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail, - txkring->rcur, txkring->rhead, txkring->rtail, j); + ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", + txkring->nr_hwcur, txkring->nr_hwtail, + txkring->rcur, txkring->rhead, txkring->rtail, k); - mb(); /* make sure rxkring->nr_hwtail is updated before notifying */ rxkring->nm_notify(rxkring, 0); return 0; @@ -245,20 +234,46 @@ int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) { struct netmap_kring *txkring = rxkring->pipe; - uint32_t oldhwcur = rxkring->nr_hwcur; + u_int k, lim = rxkring->nkr_num_slots - 1; + int m; /* slots to release */ + struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; - ND("%s %x <- %s", rxkring->name, flags, txkring->name); - rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */ - ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail, + ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); + ND(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d", + rxkring->nr_hwcur, rxkring->nr_hwtail, rxkring->rcur, rxkring->rhead, rxkring->rtail); - mb(); /* paired with the first mb() in txsync */ - if (oldhwcur != rxkring->nr_hwcur) { - /* we have released some slots, notify the other end */ - mb(); /* make sure nr_hwcur is updated before notifying */ - txkring->nm_notify(txkring, 0); + m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */ + if (m < 0) + m += rxkring->nkr_num_slots; + + if (m == 0) { + /* nothing to release */ + return 0; } - return 0; + + for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) { + struct netmap_slot *rs = &rxring->slot[k]; + struct netmap_slot *ts = &txring->slot[k]; + + if (rs->flags & NS_BUF_CHANGED) { + /* copy the slot and report the buffer change */ + *ts = *rs; + rs->flags &= ~NS_BUF_CHANGED; + } + } + + mb(); /* make sure the slots are updated before publishing them */ + txkring->nr_hwtail = nm_prev(k, lim); + rxkring->nr_hwcur = k; + + ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", + rxkring->nr_hwcur, rxkring->nr_hwtail, + rxkring->rcur, rxkring->rhead, rxkring->rtail, k); + + txkring->nm_notify(txkring, 0); + + return 0; } /* Pipe endpoints are created and destroyed together, so that endopoints do not @@ -335,8 +350,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na) for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < nma_get_nrings(na, t); i++) { - NMR(na, t)[i].pipe = NMR(ona, r) + i; - NMR(ona, r)[i].pipe = NMR(na, t) + i; + NMR(na, t)[i]->pipe = NMR(ona, r)[i]; + NMR(ona, r)[i]->pipe = NMR(na, t)[i]; + /* mark all peer-adapter rings as fake */ + NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING; } } @@ -380,7 +397,7 @@ err: * usr1 --> e1 e2 <-- usr2 * * and we are either e1 or e2. Add a ref from the - * other end and hide our rings. + * other end. */ static int netmap_pipe_reg(struct netmap_adapter *na, int onoff) @@ -395,7 +412,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) if (onoff) { for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { /* mark the peer ring as needed */ @@ -404,7 +421,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) } } - /* create all missing needed rings on the other end */ + /* create all missing needed rings on the other end. + * Either our end, or the other, has been marked as + * fake, so the allocation will not be done twice. + */ error = netmap_mem_rings_create(ona); if (error) return error; @@ -412,9 +432,32 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) /* In case of no error we put our rings in netmap mode */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; - + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { + struct netmap_kring *sring, *dring; + + /* copy the buffers from the non-fake ring */ + if (kring->nr_kflags & NKR_FAKERING) { + sring = kring->pipe; + dring = kring; + } else { + sring = kring; + dring = kring->pipe; + } + memcpy(dring->ring->slot, + sring->ring->slot, + sizeof(struct netmap_slot) * + sring->nkr_num_slots); + /* mark both rings as fake and needed, + * so that buffers will not be + * deleted by the standard machinery + * (we will delete them by ourselves in + * netmap_pipe_krings_delete) + */ + sring->nr_kflags |= + (NKR_FAKERING | NKR_NEEDRING); + dring->nr_kflags |= + (NKR_FAKERING | NKR_NEEDRING); kring->nr_mode = NKR_NETMAP_ON; } } @@ -426,21 +469,13 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; - /* mark the peer ring as no longer needed by us - * (it may still be kept if sombody else is using it) - */ - if (kring->pipe) { - kring->pipe->nr_kflags &= ~NKR_NEEDRING; - } } } } - /* delete all the peer rings that are no longer needed */ - netmap_mem_rings_delete(ona); } if (na->active_fds) { @@ -482,29 +517,73 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) * and we are either e1 or e2. * * In the former case we have to also delete the krings of e2; - * in the latter case we do nothing (note that our krings - * have already been hidden in the unregister callback). + * in the latter case we do nothing. */ static void netmap_pipe_krings_delete(struct netmap_adapter *na) { struct netmap_pipe_adapter *pna = (struct netmap_pipe_adapter *)na; - struct netmap_adapter *ona; /* na of the other end */ + struct netmap_adapter *sna, *ona; /* na of the other end */ + enum txrx t; + int i; if (!pna->peer_ref) { ND("%p: case 2, kept alive by peer", na); return; } + ona = &pna->peer->up; /* case 1) above */ ND("%p: case 1, deleting everything", na); + /* To avoid double-frees we zero-out all the buffers in the kernel part + * of each ring. The reason is this: If the user is behaving correctly, + * all buffers are found in exactly one slot in the userspace part of + * some ring. If the user is not behaving correctly, we cannot release + * buffers cleanly anyway. In the latter case, the allocator will + * return to a clean state only when all its users will close. + */ + sna = na; +cleanup: + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) { + struct netmap_kring *kring = NMR(sna, t)[i]; + struct netmap_ring *ring = kring->ring; + uint32_t j, lim = kring->nkr_num_slots - 1; + + ND("%s ring %p hwtail %u hwcur %u", + kring->name, ring, kring->nr_hwtail, kring->nr_hwcur); + + if (ring == NULL) + continue; + + if (kring->nr_hwtail == kring->nr_hwcur) + ring->slot[kring->nr_hwtail].buf_idx = 0; + + for (j = nm_next(kring->nr_hwtail, lim); + j != kring->nr_hwcur; + j = nm_next(j, lim)) + { + ND("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx); + ring->slot[j].buf_idx = 0; + } + kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING); + } + + } + if (sna != ona && ona->tx_rings) { + sna = ona; + goto cleanup; + } + + netmap_mem_rings_delete(na); netmap_krings_delete(na); /* also zeroes tx_rings etc. */ - ona = &pna->peer->up; + if (ona->tx_rings == NULL) { /* already deleted, we must be on an * cleanup-after-error path */ return; } + netmap_mem_rings_delete(ona); netmap_krings_delete(ona); } @@ -520,7 +599,7 @@ netmap_pipe_dtor(struct netmap_adapter *na) pna->peer_ref = 0; netmap_adapter_put(&pna->peer->up); } - if (pna->role == NR_REG_PIPE_MASTER) + if (pna->role == NM_PIPE_ROLE_MASTER) netmap_pipe_remove(pna->parent, pna); if (pna->parent_ifp) if_rele(pna->parent_ifp); @@ -529,34 +608,55 @@ netmap_pipe_dtor(struct netmap_adapter *na) } int -netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - struct nmreq pnmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct netmap_adapter *pna; /* parent adapter */ - struct netmap_pipe_adapter *mna, *sna, *req; + struct netmap_pipe_adapter *mna, *sna, *reqna; struct ifnet *ifp = NULL; - u_int pipe_id; - int role = nmr->nr_flags & NR_REG_MASK; + const char *pipe_id = NULL; + int role = 0; int error, retries = 0; + char *cbra; - ND("flags %x", nmr->nr_flags); + /* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */ + cbra = strrchr(hdr->nr_name, '{'); + if (cbra != NULL) { + role = NM_PIPE_ROLE_MASTER; + } else { + cbra = strrchr(hdr->nr_name, '}'); + if (cbra != NULL) { + role = NM_PIPE_ROLE_SLAVE; + } else { + ND("not a pipe"); + return 0; + } + } + pipe_id = cbra + 1; + if (*pipe_id == '\0' || cbra == hdr->nr_name) { + /* Bracket is the last character, so pipe name is missing; + * or bracket is the first character, so base port name + * is missing. */ + return EINVAL; + } - if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) { - ND("not a pipe"); - return 0; + if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) { + /* We only accept modes involving hardware rings. */ + return EINVAL; } - role = nmr->nr_flags & NR_REG_MASK; /* first, try to find the parent adapter */ - bzero(&pnmr, sizeof(pnmr)); - memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ); - /* pass to parent the requested number of pipes */ - pnmr.nr_arg1 = nmr->nr_arg1; for (;;) { + char nr_name_orig[NETMAP_REQ_IFNAMSIZ]; int create_error; - error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create); + /* Temporarily remove the pipe suffix. */ + strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); + *cbra = '\0'; + error = netmap_get_na(hdr, &pna, &ifp, nmd, create); + /* Restore the pipe suffix. */ + strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (!error) break; if (error != ENXIO || retries++) { @@ -565,9 +665,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, } ND("try to create a persistent vale port"); /* create a persistent vale port and try again */ + *cbra = '\0'; NMG_UNLOCK(); - create_error = netmap_vi_create(&pnmr, 1 /* autodelete */); + create_error = netmap_vi_create(hdr, 1 /* autodelete */); NMG_LOCK(); + strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (create_error && create_error != EEXIST) { if (create_error != EOPNOTSUPP) { D("failed to create a persistent vale port: %d", create_error); @@ -583,16 +685,15 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, } /* next, lookup the pipe id in the parent list */ - req = NULL; - pipe_id = nmr->nr_ringid & NETMAP_RING_MASK; + reqna = NULL; mna = netmap_pipe_find(pna, pipe_id); if (mna) { if (mna->role == role) { - ND("found %d directly at %d", pipe_id, mna->parent_slot); - req = mna; + ND("found %s directly at %d", pipe_id, mna->parent_slot); + reqna = mna; } else { - ND("found %d indirectly at %d", pipe_id, mna->parent_slot); - req = mna->peer; + ND("found %s indirectly at %d", pipe_id, mna->parent_slot); + reqna = mna->peer; } /* the pipe we have found already holds a ref to the parent, * so we need to drop the one we got from netmap_get_na() @@ -600,7 +701,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, netmap_unget_na(pna, ifp); goto found; } - ND("pipe %d not found, create %d", pipe_id, create); + ND("pipe %s not found, create %d", pipe_id, create); if (!create) { error = ENODEV; goto put_out; @@ -614,10 +715,9 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, error = ENOMEM; goto put_out; } - snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id); + snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id); - mna->id = pipe_id; - mna->role = NR_REG_PIPE_MASTER; + mna->role = NM_PIPE_ROLE_MASTER; mna->parent = pna; mna->parent_ifp = ifp; @@ -631,12 +731,16 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, mna->up.na_flags |= NAF_MEM_OWNER; mna->up.na_lut = pna->na_lut; - mna->up.num_tx_rings = 1; - mna->up.num_rx_rings = 1; - mna->up.num_tx_desc = nmr->nr_tx_slots; + mna->up.num_tx_rings = req->nr_tx_rings; + nm_bound_var(&mna->up.num_tx_rings, 1, + 1, NM_PIPE_MAXRINGS, NULL); + mna->up.num_rx_rings = req->nr_rx_rings; + nm_bound_var(&mna->up.num_rx_rings, 1, + 1, NM_PIPE_MAXRINGS, NULL); + mna->up.num_tx_desc = req->nr_tx_slots; nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 1, NM_PIPE_MAXSLOTS, NULL); - mna->up.num_rx_desc = nmr->nr_rx_slots; + mna->up.num_rx_desc = req->nr_rx_slots; nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 1, NM_PIPE_MAXSLOTS, NULL); error = netmap_attach_common(&mna->up); @@ -656,8 +760,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, /* most fields are the same, copy from master and then fix */ *sna = *mna; sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem); - snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id); - sna->role = NR_REG_PIPE_SLAVE; + /* swap the number of tx/rx rings */ + sna->up.num_tx_rings = mna->up.num_rx_rings; + sna->up.num_rx_rings = mna->up.num_tx_rings; + snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id); + sna->role = NM_PIPE_ROLE_SLAVE; error = netmap_attach_common(&sna->up); if (error) goto free_sna; @@ -674,21 +781,21 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, if (ifp) if_ref(ifp); - if (role == NR_REG_PIPE_MASTER) { - req = mna; + if (role == NM_PIPE_ROLE_MASTER) { + reqna = mna; mna->peer_ref = 1; netmap_adapter_get(&sna->up); } else { - req = sna; + reqna = sna; sna->peer_ref = 1; netmap_adapter_get(&mna->up); } ND("created master %p and slave %p", mna, sna); found: - ND("pipe %d %s at %p", pipe_id, - (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req); - *na = &req->up; + ND("pipe %s %s at %p", pipe_id, + (reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna); + *na = &reqna->up; netmap_adapter_get(*na); /* keep the reference to the parent. diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c index edb49dc504acd..cfa32b0bcf5f4 100644 --- a/sys/dev/netmap/netmap_pt.c +++ b/sys/dev/netmap/netmap_pt.c @@ -639,9 +639,9 @@ static struct netmap_kring * ptnetmap_kring(struct netmap_pt_host_adapter *pth_na, int k) { if (k < pth_na->up.num_tx_rings) { - return pth_na->up.tx_rings + k; + return pth_na->up.tx_rings[k]; } - return pth_na->up.rx_rings + k - pth_na->up.num_tx_rings; + return pth_na->up.rx_rings[k - pth_na->up.num_tx_rings]; } static int @@ -676,8 +676,19 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na, struct nm_kctx_cfg nmk_cfg; unsigned int num_rings; uint8_t *cfg_entries = (uint8_t *)(cfg + 1); + unsigned int expected_cfgtype = 0; int k; +#if defined(__FreeBSD__) + expected_cfgtype = PTNETMAP_CFGTYPE_BHYVE; +#elif defined(linux) + expected_cfgtype = PTNETMAP_CFGTYPE_QEMU; +#endif + if (cfg->cfgtype != expected_cfgtype) { + D("Unsupported cfgtype %u", cfg->cfgtype); + return EINVAL; + } + num_rings = pth_na->up.num_tx_rings + pth_na->up.num_rx_rings; @@ -695,7 +706,7 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na, } ptns->kctxs[k] = nm_os_kctx_create(&nmk_cfg, - cfg->cfgtype, cfg_entries + k * cfg->entry_size); + cfg_entries + k * cfg->entry_size); if (ptns->kctxs[k] == NULL) { goto err; } @@ -761,34 +772,6 @@ ptnetmap_stop_kctx_workers(struct netmap_pt_host_adapter *pth_na) } } -static struct ptnetmap_cfg * -ptnetmap_read_cfg(struct nmreq *nmr) -{ - uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1; - struct ptnetmap_cfg *cfg; - struct ptnetmap_cfg tmp; - size_t cfglen; - - if (copyin((const void *)*nmr_ptncfg, &tmp, sizeof(tmp))) { - D("Partial copyin() failed"); - return NULL; - } - - cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size; - cfg = nm_os_malloc(cfglen); - if (!cfg) { - return NULL; - } - - if (copyin((const void *)*nmr_ptncfg, cfg, cfglen)) { - D("Full copyin() failed"); - nm_os_free(cfg); - return NULL; - } - - return cfg; -} - static int nm_unused_notify(struct netmap_kring *, int); static int nm_pt_host_notify(struct netmap_kring *, int); @@ -864,14 +847,14 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na, } for (i = 0; i < pth_na->parent->num_rx_rings; i++) { - pth_na->up.rx_rings[i].save_notify = - pth_na->up.rx_rings[i].nm_notify; - pth_na->up.rx_rings[i].nm_notify = nm_pt_host_notify; + pth_na->up.rx_rings[i]->save_notify = + pth_na->up.rx_rings[i]->nm_notify; + pth_na->up.rx_rings[i]->nm_notify = nm_pt_host_notify; } for (i = 0; i < pth_na->parent->num_tx_rings; i++) { - pth_na->up.tx_rings[i].save_notify = - pth_na->up.tx_rings[i].nm_notify; - pth_na->up.tx_rings[i].nm_notify = nm_pt_host_notify; + pth_na->up.tx_rings[i]->save_notify = + pth_na->up.tx_rings[i]->nm_notify; + pth_na->up.tx_rings[i]->nm_notify = nm_pt_host_notify; } #ifdef RATE @@ -912,14 +895,14 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na) pth_na->parent->na_flags = pth_na->parent_na_flags; for (i = 0; i < pth_na->parent->num_rx_rings; i++) { - pth_na->up.rx_rings[i].nm_notify = - pth_na->up.rx_rings[i].save_notify; - pth_na->up.rx_rings[i].save_notify = NULL; + pth_na->up.rx_rings[i]->nm_notify = + pth_na->up.rx_rings[i]->save_notify; + pth_na->up.rx_rings[i]->save_notify = NULL; } for (i = 0; i < pth_na->parent->num_tx_rings; i++) { - pth_na->up.tx_rings[i].nm_notify = - pth_na->up.tx_rings[i].save_notify; - pth_na->up.tx_rings[i].save_notify = NULL; + pth_na->up.tx_rings[i]->nm_notify = + pth_na->up.tx_rings[i]->save_notify; + pth_na->up.tx_rings[i]->save_notify = NULL; } /* Destroy kernel contexts. */ @@ -941,66 +924,55 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na) /* * Called by netmap_ioctl(). - * Operation is indicated in nmr->nr_cmd. + * Operation is indicated in nr_name. * * Called without NMG_LOCK. */ int -ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na) +ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na) { - struct netmap_pt_host_adapter *pth_na; - struct ptnetmap_cfg *cfg; - char *name; - int cmd, error = 0; - - name = nmr->nr_name; - cmd = nmr->nr_cmd; - - DBG(D("name: %s", name)); + struct netmap_pt_host_adapter *pth_na; + struct ptnetmap_cfg *cfg = NULL; + int error = 0; - if (!nm_ptnetmap_host_on(na)) { - D("ERROR Netmap adapter %p is not a ptnetmap host adapter", na); - error = ENXIO; - goto done; - } - pth_na = (struct netmap_pt_host_adapter *)na; - - NMG_LOCK(); - switch (cmd) { - case NETMAP_PT_HOST_CREATE: - /* Read hypervisor configuration from userspace. */ - cfg = ptnetmap_read_cfg(nmr); - if (!cfg) - break; - /* Create ptnetmap state (kctxs, ...) and switch parent - * adapter to ptnetmap mode. */ - error = ptnetmap_create(pth_na, cfg); - nm_os_free(cfg); - if (error) - break; - /* Start kthreads. */ - error = ptnetmap_start_kctx_workers(pth_na); - if (error) - ptnetmap_delete(pth_na); - break; - - case NETMAP_PT_HOST_DELETE: - /* Stop kthreads. */ - ptnetmap_stop_kctx_workers(pth_na); - /* Switch parent adapter back to normal mode and destroy - * ptnetmap state (kthreads, ...). */ - ptnetmap_delete(pth_na); - break; + DBG(D("name: %s", nr_name)); - default: - D("ERROR invalid cmd (nmr->nr_cmd) (0x%x)", cmd); - error = EINVAL; - break; - } - NMG_UNLOCK(); + if (!nm_ptnetmap_host_on(na)) { + D("ERROR Netmap adapter %p is not a ptnetmap host adapter", + na); + return ENXIO; + } + pth_na = (struct netmap_pt_host_adapter *)na; + + NMG_LOCK(); + if (create) { + /* Read hypervisor configuration from userspace. */ + /* TODO */ + if (!cfg) { + goto out; + } + /* Create ptnetmap state (kctxs, ...) and switch parent + * adapter to ptnetmap mode. */ + error = ptnetmap_create(pth_na, cfg); + nm_os_free(cfg); + if (error) { + goto out; + } + /* Start kthreads. */ + error = ptnetmap_start_kctx_workers(pth_na); + if (error) + ptnetmap_delete(pth_na); + } else { + /* Stop kthreads. */ + ptnetmap_stop_kctx_workers(pth_na); + /* Switch parent adapter back to normal mode and destroy + * ptnetmap state (kthreads, ...). */ + ptnetmap_delete(pth_na); + } +out: + NMG_UNLOCK(); -done: - return error; + return error; } /* nm_notify callbacks for ptnetmap */ @@ -1048,8 +1020,7 @@ nm_unused_notify(struct netmap_kring *kring, int flags) /* nm_config callback for bwrap */ static int -nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) +nm_pt_host_config(struct netmap_adapter *na, struct nm_config_info *info) { struct netmap_pt_host_adapter *pth_na = (struct netmap_pt_host_adapter *)na; @@ -1061,12 +1032,11 @@ nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd, /* forward the request */ error = netmap_update_config(parent); - *rxr = na->num_rx_rings = parent->num_rx_rings; - *txr = na->num_tx_rings = parent->num_tx_rings; - *txd = na->num_tx_desc = parent->num_tx_desc; - *rxd = na->num_rx_desc = parent->num_rx_desc; - - DBG(D("rxr: %d txr: %d txd: %d rxd: %d", *rxr, *txr, *txd, *rxd)); + info->num_rx_rings = na->num_rx_rings = parent->num_rx_rings; + info->num_tx_rings = na->num_tx_rings = parent->num_tx_rings; + info->num_tx_descs = na->num_tx_desc = parent->num_tx_desc; + info->num_rx_descs = na->num_rx_desc = parent->num_rx_desc; + info->rx_buf_maxsize = na->rx_buf_maxsize = parent->rx_buf_maxsize; return error; } @@ -1107,7 +1077,7 @@ nm_pt_host_krings_create(struct netmap_adapter *na) * host rings independently on what the regif asked for: * these rings are needed by the guest ptnetmap adapter * anyway. */ - kring = &NMR(na, t)[nma_get_nrings(na, t)]; + kring = NMR(na, t)[nma_get_nrings(na, t)]; kring->nr_kflags |= NKR_NEEDRING; } @@ -1187,17 +1157,18 @@ nm_pt_host_dtor(struct netmap_adapter *na) /* check if nmr is a request for a ptnetmap adapter that we can satisfy */ int -netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - struct nmreq parent_nmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; + struct nmreq_register preq; struct netmap_adapter *parent; /* target adapter */ struct netmap_pt_host_adapter *pth_na; struct ifnet *ifp = NULL; int error; /* Check if it is a request for a ptnetmap adapter */ - if ((nmr->nr_flags & (NR_PTNETMAP_HOST)) == 0) { + if ((req->nr_flags & (NR_PTNETMAP_HOST)) == 0) { return 0; } @@ -1210,12 +1181,14 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, } /* first, try to find the adapter that we want to passthrough - * We use the same nmr, after we have turned off the ptnetmap flag. + * We use the same req, after we have turned off the ptnetmap flag. * In this way we can potentially passthrough everything netmap understands. */ - memcpy(&parent_nmr, nmr, sizeof(parent_nmr)); - parent_nmr.nr_flags &= ~(NR_PTNETMAP_HOST); - error = netmap_get_na(&parent_nmr, &parent, &ifp, nmd, create); + memcpy(&preq, req, sizeof(preq)); + preq.nr_flags &= ~(NR_PTNETMAP_HOST); + hdr->nr_body = (uint64_t)&preq; + error = netmap_get_na(hdr, &parent, &ifp, nmd, create); + hdr->nr_body = (uint64_t)req; if (error) { D("parent lookup failed: %d", error); goto put_out_noputparent; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index d364699bce269..6e0748acd5300 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -166,7 +166,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, "Max batch size to be used in the bridge"); SYSEND; -static int netmap_vp_create(struct nmreq *, struct ifnet *, +static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *, struct netmap_mem_d *nmd, struct netmap_vp_adapter **); static int netmap_vp_reg(struct netmap_adapter *na, int onoff); static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); @@ -188,6 +188,9 @@ struct nm_hash_ent { uint64_t ports; }; +/* Holds the default callbacks */ +static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL}; + /* * nm_bridge is a descriptor for a VALE switch. * Interfaces for a bridge are all in bdg_ports[]. @@ -201,37 +204,50 @@ struct nm_hash_ent { * bdg_lock protects accesses to the bdg_ports array. * This is a rw lock (or equivalent). */ +#define NM_BDG_IFNAMSIZ IFNAMSIZ struct nm_bridge { /* XXX what is the proper alignment/layout ? */ BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ int bdg_namelen; - uint32_t bdg_active_ports; /* 0 means free */ - char bdg_basename[IFNAMSIZ]; + uint32_t bdg_active_ports; + char bdg_basename[NM_BDG_IFNAMSIZ]; /* Indexes of active ports (up to active_ports) * and all other remaining ports. */ - uint8_t bdg_port_index[NM_BDG_MAXPORTS]; + uint32_t bdg_port_index[NM_BDG_MAXPORTS]; + /* used by netmap_bdg_detach_common() */ + uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS]; struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; - /* - * The function to decide the destination port. + * Programmable lookup functions to figure out the destination port. * It returns either of an index of the destination port, * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to * forward this packet. ring_nr is the source ring index, and the * function may overwrite this value to forward this packet to a * different ring index. - * This function must be set by netmap_bdg_ctl(). + * The function is set by netmap_bdg_regops(). + */ + struct netmap_bdg_ops *bdg_ops; + + /* + * Contains the data structure used by the bdg_ops.lookup function. + * By default points to *ht which is allocated on attach and used by the default lookup + * otherwise will point to the data structure received by netmap_bdg_regops(). */ - struct netmap_bdg_ops bdg_ops; + void *private_data; + struct nm_hash_ent *ht; - /* the forwarding table, MAC+ports. - * XXX should be changed to an argument to be passed to - * the lookup function + /* Currently used to specify if the bridge is still in use while empty and + * if it has been put in exclusive mode by an external module, see netmap_bdg_regops() + * and netmap_bdg_create(). */ - struct nm_hash_ent *ht; // allocated on attach +#define NM_BDG_ACTIVE 1 +#define NM_BDG_EXCLUSIVE 2 + uint8_t bdg_flags; + #ifdef CONFIG_NET_NS struct net *ns; @@ -309,18 +325,17 @@ nm_vale_name_validate(const char *name) return -1; } - for (i = 0; name[i]; i++) { + for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { if (name[i] == ':') { - if (colon_pos != -1) { - return -1; - } colon_pos = i; + break; } else if (!nm_is_id_char(name[i])) { return -1; } } - if (i >= IFNAMSIZ) { + if (strlen(name) - colon_pos > IFNAMSIZ) { + /* interface name too long */ return -1; } @@ -355,7 +370,7 @@ nm_find_bridge(const char *name, int create) for (i = 0; i < num_bridges; i++) { struct nm_bridge *x = bridges + i; - if (x->bdg_active_ports == 0) { + if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { if (create && b == NULL) b = x; /* record empty slot */ } else if (x->bdg_namelen != namelen) { @@ -381,7 +396,9 @@ nm_find_bridge(const char *name, int create) for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ - b->bdg_ops.lookup = netmap_bdg_learning; + b->bdg_ops = &default_bdg_ops; + b->private_data = b->ht; + b->bdg_flags = 0; NM_BNS_GET(b); } return b; @@ -395,15 +412,15 @@ static void nm_free_bdgfwd(struct netmap_adapter *na) { int nrings, i; - struct netmap_kring *kring; + struct netmap_kring **kring; NMG_LOCK_ASSERT(); nrings = na->num_tx_rings; kring = na->tx_rings; for (i = 0; i < nrings; i++) { - if (kring[i].nkr_ft) { - nm_os_free(kring[i].nkr_ft); - kring[i].nkr_ft = NULL; /* protect from freeing twice */ + if (kring[i]->nkr_ft) { + nm_os_free(kring[i]->nkr_ft); + kring[i]->nkr_ft = NULL; /* protect from freeing twice */ } } } @@ -416,7 +433,7 @@ static int nm_alloc_bdgfwd(struct netmap_adapter *na) { int nrings, l, i, num_dstq; - struct netmap_kring *kring; + struct netmap_kring **kring; NMG_LOCK_ASSERT(); /* all port:rings + broadcast */ @@ -442,11 +459,26 @@ nm_alloc_bdgfwd(struct netmap_adapter *na) dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; dstq[j].bq_len = 0; } - kring[i].nkr_ft = ft; + kring[i]->nkr_ft = ft; } return 0; } +static int +netmap_bdg_free(struct nm_bridge *b) +{ + if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { + return EBUSY; + } + + ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); + b->bdg_ops = NULL; + b->bdg_flags = 0; + NM_BNS_PUT(b); + return 0; +} + /* remove from bridge b the ports in slots hw and sw * (sw can be -1 if not needed) @@ -456,7 +488,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) { int s_hw = hw, s_sw = sw; int i, lim =b->bdg_active_ports; - uint8_t tmp[NM_BDG_MAXPORTS]; + uint32_t *tmp = b->tmp_bdg_port_index; /* New algorithm: @@ -473,7 +505,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) /* make a copy of the list of active ports, update it, * and then copy back within BDG_WLOCK(). */ - memcpy(tmp, b->bdg_port_index, sizeof(tmp)); + memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { if (hw >= 0 && tmp[i] == hw) { ND("detach hw %d at %d", hw, i); @@ -496,35 +528,117 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) } BDG_WLOCK(b); - if (b->bdg_ops.dtor) - b->bdg_ops.dtor(b->bdg_ports[s_hw]); + if (b->bdg_ops->dtor) + b->bdg_ops->dtor(b->bdg_ports[s_hw]); b->bdg_ports[s_hw] = NULL; if (s_sw >= 0) { b->bdg_ports[s_sw] = NULL; } - memcpy(b->bdg_port_index, tmp, sizeof(tmp)); + memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); b->bdg_active_ports = lim; BDG_WUNLOCK(b); ND("now %d active ports", lim); - if (lim == 0) { - ND("marking bridge %s as free", b->bdg_basename); - nm_os_free(b->ht); - bzero(&b->bdg_ops, sizeof(b->bdg_ops)); - NM_BNS_PUT(b); + netmap_bdg_free(b); +} + +static inline void * +nm_bdg_get_auth_token(struct nm_bridge *b) +{ + return b->ht; +} + +/* bridge not in exclusive mode ==> always valid + * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token + */ +static inline int +nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token) +{ + return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token; +} + +/* Allows external modules to create bridges in exclusive mode, + * returns an authentication token that the external module will need + * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(), + * and nm_bdg_update_private_data() operations. + * Successfully executed if ret != NULL and *return_status == 0. + */ +void * +netmap_bdg_create(const char *bdg_name, int *return_status) +{ + struct nm_bridge *b = NULL; + void *ret = NULL; + + NMG_LOCK(); + b = nm_find_bridge(bdg_name, 0 /* don't create */); + if (b) { + *return_status = EEXIST; + goto unlock_bdg_create; + } + + b = nm_find_bridge(bdg_name, 1 /* create */); + if (!b) { + *return_status = ENOMEM; + goto unlock_bdg_create; + } + + b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE; + ret = nm_bdg_get_auth_token(b); + *return_status = 0; + +unlock_bdg_create: + NMG_UNLOCK(); + return ret; +} + +/* Allows external modules to destroy a bridge created through + * netmap_bdg_create(), the bridge must be empty. + */ +int +netmap_bdg_destroy(const char *bdg_name, void *auth_token) +{ + struct nm_bridge *b = NULL; + int ret = 0; + + NMG_LOCK(); + b = nm_find_bridge(bdg_name, 0 /* don't create */); + if (!b) { + ret = ENXIO; + goto unlock_bdg_free; + } + + if (!nm_bdg_valid_auth_token(b, auth_token)) { + ret = EACCES; + goto unlock_bdg_free; + } + if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) { + ret = EINVAL; + goto unlock_bdg_free; + } + + b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE); + ret = netmap_bdg_free(b); + if (ret) { + b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE; } + +unlock_bdg_free: + NMG_UNLOCK(); + return ret; } + + /* nm_bdg_ctl callback for VALE ports */ static int -netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) +netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; struct nm_bridge *b = vpna->na_bdg; - (void)nmr; // XXX merge ? - if (attach) + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { return 0; /* nothing to do */ + } if (b) { netmap_set_all_rings(na, 0 /* disable */); netmap_bdg_detach_common(b, vpna->bdg_port, -1); @@ -560,8 +674,38 @@ netmap_vp_dtor(struct netmap_adapter *na) } } +/* creates a persistent VALE port */ +int +nm_vi_create(struct nmreq_header *hdr) +{ + struct nmreq_vale_newif *req = + (struct nmreq_vale_newif *)hdr->nr_body; + int error = 0; + /* Build a nmreq_register out of the nmreq_vale_newif, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_tx_slots = req->nr_tx_slots; + regreq.nr_rx_slots = req->nr_rx_slots; + regreq.nr_tx_rings = req->nr_tx_rings; + regreq.nr_rx_rings = req->nr_rx_rings; + regreq.nr_mem_id = req->nr_mem_id; + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_vi_create(hdr, 0 /* no autodelete */); + hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; + hdr->nr_body = (uint64_t)req; + /* Write back to the original struct. */ + req->nr_tx_slots = regreq.nr_tx_slots; + req->nr_rx_slots = regreq.nr_rx_slots; + req->nr_tx_rings = regreq.nr_tx_rings; + req->nr_rx_rings = regreq.nr_rx_rings; + req->nr_mem_id = regreq.nr_mem_id; + return error; +} + /* remove a persistent VALE port from the system */ -static int +int nm_vi_destroy(const char *name) { struct ifnet *ifp; @@ -611,17 +755,14 @@ err: } static int -nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) +nm_update_info(struct nmreq_register *req, struct netmap_adapter *na) { - uint64_t memsize; - int ret; - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2); - nmr->nr_memsize = (uint32_t)memsize; - return ret; + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL, + &req->nr_mem_id); } /* @@ -629,22 +770,30 @@ nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) * The interface will be attached to a bridge later. */ int -netmap_vi_create(struct nmreq *nmr, int autodelete) +netmap_vi_create(struct nmreq_header *hdr, int autodelete) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct ifnet *ifp; struct netmap_vp_adapter *vpna; struct netmap_mem_d *nmd = NULL; int error; + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } + /* don't include VALE prefix */ - if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) + if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) + return EINVAL; + if (strlen(hdr->nr_name) >= IFNAMSIZ) { return EINVAL; - ifp = ifunit_ref(nmr->nr_name); + } + ifp = ifunit_ref(hdr->nr_name); if (ifp) { /* already exist, cannot create new one */ error = EEXIST; NMG_LOCK(); if (NM_NA_VALID(ifp)) { - int update_err = nm_update_info(nmr, NA(ifp)); + int update_err = nm_update_info(req, NA(ifp)); if (update_err) error = update_err; } @@ -652,20 +801,20 @@ netmap_vi_create(struct nmreq *nmr, int autodelete) if_rele(ifp); return error; } - error = nm_os_vi_persist(nmr->nr_name, &ifp); + error = nm_os_vi_persist(hdr->nr_name, &ifp); if (error) return error; NMG_LOCK(); - if (nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (req->nr_mem_id) { + nmd = netmap_mem_find(req->nr_mem_id); if (nmd == NULL) { error = EINVAL; goto err_1; } } /* netmap_vp_create creates a struct netmap_vp_adapter */ - error = netmap_vp_create(nmr, ifp, nmd, &vpna); + error = netmap_vp_create(hdr, ifp, nmd, &vpna); if (error) { D("error %d", error); goto err_1; @@ -679,15 +828,15 @@ netmap_vi_create(struct nmreq *nmr, int autodelete) } NM_ATTACH_NA(ifp, &vpna->up); /* return the updated info */ - error = nm_update_info(nmr, &vpna->up); + error = nm_update_info(req, &vpna->up); if (error) { goto err_2; } - D("returning nr_arg2 %d", nmr->nr_arg2); + ND("returning nr_mem_id %d", req->nr_mem_id); if (nmd) netmap_mem_put(nmd); NMG_UNLOCK(); - D("created %s", ifp->if_xname); + ND("created %s", ifp->if_xname); return 0; err_2: @@ -711,16 +860,17 @@ err_1: * (*na != NULL && return == 0). */ int -netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - char *nr_name = nmr->nr_name; + char *nr_name = hdr->nr_name; const char *ifname; struct ifnet *ifp = NULL; int error = 0; struct netmap_vp_adapter *vpna, *hostna = NULL; struct nm_bridge *b; - int i, j, cand = -1, cand2 = -1; + uint32_t i, j; + uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; int needed; *na = NULL; /* default return value */ @@ -780,17 +930,17 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, ifname = nr_name + b->bdg_namelen + 1; ifp = ifunit_ref(ifname); if (!ifp) { - /* Create an ephemeral virtual port - * This block contains all the ephemeral-specific logics + /* Create an ephemeral virtual port. + * This block contains all the ephemeral-specific logic. */ - if (nmr->nr_cmd) { - /* nr_cmd must be 0 for a virtual port */ + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { error = EINVAL; goto out; } /* bdg_netmap_attach creates a struct netmap_adapter */ - error = netmap_vp_create(nmr, NULL, nmd, &vpna); + error = netmap_vp_create(hdr, NULL, nmd, &vpna); if (error) { D("error %d", error); goto out; @@ -798,15 +948,16 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, /* shortcut - we can skip get_hw_na(), * ownership check and nm_bdg_attach() */ + } else { struct netmap_adapter *hw; /* the vale:nic syntax is only valid for some commands */ - switch (nmr->nr_cmd) { - case NETMAP_BDG_ATTACH: - case NETMAP_BDG_DETACH: - case NETMAP_BDG_POLLING_ON: - case NETMAP_BDG_POLLING_OFF: + switch (hdr->nr_reqtype) { + case NETMAP_REQ_VALE_ATTACH: + case NETMAP_REQ_VALE_DETACH: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: break; /* ok */ default: error = EINVAL; @@ -823,8 +974,14 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, goto out; vpna = hw->na_vp; hostna = hw->na_hostvp; - if (nmr->nr_arg1 != NETMAP_BDG_HOST) - hostna = NULL; + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + /* Check if we need to skip the host rings. */ + struct nmreq_vale_attach *areq = + (struct nmreq_vale_attach *)hdr->nr_body; + if (areq->reg.nr_mode != NR_REG_NIC_SW) { + hostna = NULL; + } + } } BDG_WLOCK(b); @@ -854,34 +1011,46 @@ out: return error; } - -/* Process NETMAP_BDG_ATTACH */ -static int -nm_bdg_ctl_attach(struct nmreq *nmr) +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + struct netmap_vp_adapter * vpna; struct netmap_adapter *na; struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; int error; NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } - if (nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); if (nmd == NULL) { error = EINVAL; goto unlock_exit; } } - /* XXX check existing one */ - error = netmap_get_bdg_na(nmr, &na, nmd, 0); + /* check for existing one */ + error = netmap_get_bdg_na(hdr, &na, nmd, 0); if (!error) { error = EBUSY; goto unref_exit; } - error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); - if (error) /* no device */ + error = netmap_get_bdg_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ goto unlock_exit; + } if (na == NULL) { /* VALE prefix missing */ error = EINVAL; @@ -897,11 +1066,13 @@ nm_bdg_ctl_attach(struct nmreq *nmr) /* nop for VALE ports. The bwrap needs to put the hwna * in netmap mode (see netmap_bwrap_bdg_ctl) */ - error = na->nm_bdg_ctl(na, nmr, 1); + error = na->nm_bdg_ctl(hdr, na); if (error) goto unref_exit; ND("registered %s to netmap-mode", na->name); } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; NMG_UNLOCK(); return 0; @@ -918,15 +1089,26 @@ nm_is_bwrap(struct netmap_adapter *na) return na->nm_register == netmap_bwrap_reg; } -/* process NETMAP_BDG_DETACH */ -static int -nm_bdg_ctl_detach(struct nmreq *nmr) +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) { + struct nmreq_vale_detach *nmreq_det = (void *)hdr->nr_body; + struct netmap_vp_adapter *vpna; struct netmap_adapter *na; + struct nm_bridge *b = NULL; int error; NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */); if (error) { /* no device, or another bridge or user owns the device */ goto unlock_exit; } @@ -938,16 +1120,27 @@ nm_bdg_ctl_detach(struct nmreq *nmr) ((struct netmap_bwrap_adapter *)na)->na_polling_state) { /* Don't detach a NIC with polling */ error = EBUSY; - netmap_adapter_put(na); - goto unlock_exit; + goto unref_exit; } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + if (na->nm_bdg_ctl) { /* remove the port from bridge. The bwrap * also needs to put the hwna in normal mode */ - error = na->nm_bdg_ctl(na, nmr, 0); + error = na->nm_bdg_ctl(hdr, na); } +unref_exit: netmap_adapter_put(na); unlock_exit: NMG_UNLOCK(); @@ -968,7 +1161,7 @@ struct nm_bdg_polling_state { bool configured; bool stopped; struct netmap_bwrap_adapter *bna; - u_int reg; + uint32_t mode; u_int qfirst; u_int qlast; u_int cpu_from; @@ -982,7 +1175,7 @@ netmap_bwrap_polling(void *data, int is_kthread) struct nm_bdg_kthread *nbk = data; struct netmap_bwrap_adapter *bna; u_int qfirst, qlast, i; - struct netmap_kring *kring0, *kring; + struct netmap_kring **kring0, *kring; if (!nbk) return; @@ -992,7 +1185,7 @@ netmap_bwrap_polling(void *data, int is_kthread) kring0 = NMR(bna->hwna, NR_RX); for (i = qfirst; i < qlast; i++) { - kring = kring0 + i; + kring = kring0[i]; kring->nm_notify(kring, 0); } } @@ -1012,7 +1205,8 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) kcfg.use_kthread = 1; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; - int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); + int all = (bps->ncpus == 1 && + bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); int affinity = bps->cpu_from + i; t->bps = bps; @@ -1023,7 +1217,7 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) kcfg.type = i; kcfg.worker_private = t; - t->nmk = nm_os_kctx_create(&kcfg, 0, NULL); + t->nmk = nm_os_kctx_create(&kcfg, NULL); if (t->nmk == NULL) { goto cleanup; } @@ -1088,67 +1282,68 @@ nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) } static int -get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, - struct nm_bdg_polling_state *bps) +get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, + struct nm_bdg_polling_state *bps) { - int req_cpus, avail_cpus, core_from; - u_int reg, i, qfirst, qlast; + unsigned int avail_cpus, core_from; + unsigned int qfirst, qlast; + uint32_t i = req->nr_first_cpu_id; + uint32_t req_cpus = req->nr_num_polling_cpus; avail_cpus = nm_os_ncpus(); - req_cpus = nmr->nr_arg1; if (req_cpus == 0) { D("req_cpus must be > 0"); return EINVAL; } else if (req_cpus >= avail_cpus) { - D("for safety, we need at least one core left in the system"); + D("Cannot use all the CPUs in the system"); return EINVAL; } - reg = nmr->nr_flags & NR_REG_MASK; - i = nmr->nr_ringid & NETMAP_RING_MASK; - /* - * ONE_NIC: dedicate one core to one ring. If multiple cores - * are specified, consecutive rings are also polled. - * For example, if ringid=2 and 2 cores are given, - * ring 2 and 3 are polled by core 2 and 3, respectively. - * ALL_NIC: poll all the rings using a core specified by ringid. - * the number of cores must be 1. - */ - if (reg == NR_REG_ONE_NIC) { + + if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { + /* Use a separate core for each ring. If nr_num_polling_cpus>1 + * more consecutive rings are polled. + * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, + * ring 2 and 3 are polled by core 2 and 3, respectively. */ if (i + req_cpus > nma_get_nrings(na, NR_RX)) { - D("only %d rings exist (ring %u-%u is given)", - nma_get_nrings(na, NR_RX), i, i+req_cpus); + D("Rings %u-%u not in range (have %d rings)", + i, i + req_cpus, nma_get_nrings(na, NR_RX)); return EINVAL; } qfirst = i; qlast = qfirst + req_cpus; core_from = qfirst; - } else if (reg == NR_REG_ALL_NIC) { + + } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { + /* Poll all the rings using a core specified by nr_first_cpu_id. + * the number of cores must be 1. */ if (req_cpus != 1) { - D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); + D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " + "(was %d)", req_cpus); return EINVAL; } qfirst = 0; qlast = nma_get_nrings(na, NR_RX); core_from = i; } else { - D("reg must be ALL_NIC or ONE_NIC"); + D("Invalid polling mode"); return EINVAL; } - bps->reg = reg; + bps->mode = req->nr_mode; bps->qfirst = qfirst; bps->qlast = qlast; bps->cpu_from = core_from; bps->ncpus = req_cpus; D("%s qfirst %u qlast %u cpu_from %u ncpus %u", - reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", + req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? + "MULTI" : "SINGLE", qfirst, qlast, core_from, req_cpus); return 0; } static int -nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) +nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) { struct nm_bdg_polling_state *bps; struct netmap_bwrap_adapter *bna; @@ -1166,7 +1361,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) bps->configured = false; bps->stopped = true; - if (get_polling_cfg(nmr, na, bps)) { + if (get_polling_cfg(req, na, bps)) { nm_os_free(bps); return EINVAL; } @@ -1195,7 +1390,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) } static int -nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) +nm_bdg_ctl_polling_stop(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct nm_bdg_polling_state *bps; @@ -1214,190 +1409,203 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) return 0; } -/* Called by either user's context (netmap_ioctl()) - * or external kernel modules (e.g., Openvswitch). - * Operation is indicated in nmr->nr_cmd. - * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge - * requires bdg_ops argument; the other commands ignore this argument. - * - * Called without NMG_LOCK. - */ int -netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) +nm_bdg_polling(struct nmreq_header *hdr) { + struct nmreq_vale_polling *req = + (struct nmreq_vale_polling *)hdr->nr_body; + struct netmap_adapter *na = NULL; + int error = 0; + + NMG_LOCK(); + error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0); + if (na && !error) { + if (!nm_is_bwrap(na)) { + error = EOPNOTSUPP; + } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { + error = nm_bdg_ctl_polling_start(req, na); + if (!error) + netmap_adapter_get(na); + } else { + error = nm_bdg_ctl_polling_stop(na); + if (!error) + netmap_adapter_put(na); + } + netmap_adapter_put(na); + } else if (!na && !error) { + /* Not VALE port. */ + error = EINVAL; + } + NMG_UNLOCK(); + + return error; +} + +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_bdg_list(struct nmreq_header *hdr) +{ + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)hdr->nr_body; + int namelen = strlen(hdr->nr_name); struct nm_bridge *b, *bridges; - struct netmap_adapter *na; struct netmap_vp_adapter *vpna; - char *name = nmr->nr_name; - int cmd = nmr->nr_cmd, namelen = strlen(name); int error = 0, i, j; u_int num_bridges; netmap_bns_getbridges(&bridges, &num_bridges); - switch (cmd) { - case NETMAP_BDG_NEWIF: - error = netmap_vi_create(nmr, 0 /* no autodelete */); - break; - - case NETMAP_BDG_DELIF: - error = nm_vi_destroy(nmr->nr_name); - break; - - case NETMAP_BDG_ATTACH: - error = nm_bdg_ctl_attach(nmr); - break; - - case NETMAP_BDG_DETACH: - error = nm_bdg_ctl_detach(nmr); - break; - - case NETMAP_BDG_LIST: - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { - error = EINVAL; - break; - } - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); - if (!b) { - error = ENOENT; - NMG_UNLOCK(); - break; - } - - error = 0; - nmr->nr_arg1 = b - bridges; /* bridge index */ - nmr->nr_arg2 = NM_BDG_NOPORT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("---AAAAAAAAARGH-------"); - continue; - } - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(vpna->up.name, name)) { - nmr->nr_arg2 = i; /* port index */ - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = nmr->nr_arg1; - j = nmr->nr_arg2; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = bridges + i; - for ( ; j < NM_BDG_MAXPORTS; j++) { - if (b->bdg_ports[j] == NULL) - continue; - vpna = b->bdg_ports[j]; - strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); - error = 0; - goto out; - } - j = 0; /* following bridges scan from 0 */ - } - out: - nmr->nr_arg1 = i; - nmr->nr_arg2 = j; - NMG_UNLOCK(); - } - break; - - case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ - /* register callbacks to the given bridge. - * nmr->nr_name may be just bridge's name (including ':' - * if it is not just NM_NAME). - */ - if (!bdg_ops) { - error = EINVAL; - break; + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; } NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); if (!b) { - error = EINVAL; - } else { - b->bdg_ops = *bdg_ops; - } - NMG_UNLOCK(); - break; - - case NETMAP_BDG_VNET_HDR: - /* Valid lengths for the virtio-net header are 0 (no header), - 10 and 12. */ - if (nmr->nr_arg1 != 0 && - nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && - nmr->nr_arg1 != 12) { - error = EINVAL; - break; + NMG_UNLOCK(); + return ENOENT; } - NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0); - if (na && !error) { - vpna = (struct netmap_vp_adapter *)na; - na->virt_hdr_len = nmr->nr_arg1; - if (na->virt_hdr_len) { - vpna->mfs = NETMAP_BUF_SIZE(na); + + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + D("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; } - D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); - netmap_adapter_put(na); - } else if (!na) { - error = ENXIO; } NMG_UNLOCK(); - break; + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; - case NETMAP_BDG_POLLING_ON: - case NETMAP_BDG_POLLING_OFF: NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0); - if (na && !error) { - if (!nm_is_bwrap(na)) { - error = EOPNOTSUPP; - } else if (cmd == NETMAP_BDG_POLLING_ON) { - error = nm_bdg_ctl_polling_start(nmr, na); - if (!error) - netmap_adapter_get(na); - } else { - error = nm_bdg_ctl_polling_stop(nmr, na); - if (!error) - netmap_adapter_put(na); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strncpy(hdr->nr_name, vpna->up.name, + (size_t)IFNAMSIZ); + error = 0; + goto out; } - netmap_adapter_put(na); + j = 0; /* following bridges scan from 0 */ } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; NMG_UNLOCK(); - break; + } + + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to set configure/lookup/dtor functions of a VALE instance. + * Register callbacks to the given bridge. 'name' may be just + * bridge's name (including ':' if it is not just NM_BDG_NAME). + * + * Called without NMG_LOCK. + */ + +int +netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) +{ + struct nm_bridge *b; + int error = 0; + + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */); + if (!b) { + error = ENXIO; + goto unlock_regops; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_regops; + } + + BDG_WLOCK(b); + if (!bdg_ops) { + /* resetting the bridge */ + bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); + b->bdg_ops = &default_bdg_ops; + b->private_data = b->ht; + } else { + /* modifying the bridge */ + b->private_data = private_data; + b->bdg_ops = bdg_ops; + } + BDG_WUNLOCK(b); + +unlock_regops: + NMG_UNLOCK(); + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to modify the private data previously given to regops(). + * 'name' may be just bridge's name (including ':' if it + * is not just NM_BDG_NAME). + * Called without NMG_LOCK. + */ +int +nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token) +{ + void *private_data = NULL; + struct nm_bridge *b; + int error = 0; - default: - D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */); + if (!b) { error = EINVAL; - break; + goto unlock_update_priv; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_update_priv; } + BDG_WLOCK(b); + private_data = callback(b->private_data, callback_data, &error); + b->private_data = private_data; + BDG_WUNLOCK(b); + +unlock_update_priv: + NMG_UNLOCK(); return error; } int -netmap_bdg_config(struct nmreq *nmr) +netmap_bdg_config(struct nm_ifreq *nr) { struct nm_bridge *b; int error = EINVAL; NMG_LOCK(); - b = nm_find_bridge(nmr->nr_name, 0); + b = nm_find_bridge(nr->nifr_name, 0); if (!b) { NMG_UNLOCK(); return error; @@ -1405,8 +1613,8 @@ netmap_bdg_config(struct nmreq *nmr) NMG_UNLOCK(); /* Don't call config() with NMG_LOCK() held */ BDG_RLOCK(b); - if (b->bdg_ops.config != NULL) - error = b->bdg_ops.config((struct nm_ifreq *)nmr); + if (b->bdg_ops->config != NULL) + error = b->bdg_ops->config(nr); BDG_RUNLOCK(b); return error; } @@ -1436,7 +1644,7 @@ netmap_vp_krings_create(struct netmap_adapter *na) leases = na->tailroom; for (i = 0; i < nrx; i++) { /* Receive rings */ - na->rx_rings[i].nkr_leases = leases; + na->rx_rings[i]->nkr_leases = leases; leases += na->num_rx_desc; } @@ -1502,6 +1710,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end) ft[ft_i].ft_len = slot->len; ft[ft_i].ft_flags = slot->flags; + ft[ft_i].ft_offset = 0; ND("flags is 0x%x", slot->flags); /* we do not use the buf changed flag, but we still need to reset it */ @@ -1606,7 +1815,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) if (onoff) { for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) kring->nr_mode = NKR_NETMAP_ON; @@ -1622,7 +1831,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) kring->nr_mode = NKR_NETMAP_OFF; @@ -1641,30 +1850,19 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) * and then returns the destination port index, and the * ring in *dst_ring (at the moment, always use ring 0) */ -u_int +uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - struct netmap_vp_adapter *na) + struct netmap_vp_adapter *na, void *private_data) { - uint8_t *buf = ft->ft_buf; - u_int buf_len = ft->ft_len; - struct nm_hash_ent *ht = na->na_bdg->ht; + uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset; + u_int buf_len = ft->ft_len - ft->ft_offset; + struct nm_hash_ent *ht = private_data; uint32_t sh, dh; u_int dst, mysrc = na->bdg_port; uint64_t smac, dmac; uint8_t indbuf[12]; - /* safety check, unfortunately we have many cases */ - if (buf_len >= 14 + na->up.virt_hdr_len) { - /* virthdr + mac_hdr in the same slot */ - buf += na->up.virt_hdr_len; - buf_len -= na->up.virt_hdr_len; - } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { - /* only header in first fragment */ - ft++; - buf = ft->ft_buf; - buf_len = ft->ft_len; - } else { - RD(5, "invalid buf format, length %d", buf_len); + if (buf_len < 14) { return NM_BDG_NOPORT; } @@ -1803,13 +2001,23 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, uint8_t dst_ring = ring_nr; /* default, same ring as origin */ uint16_t dst_port, d_i; struct nm_bdg_q *d; + struct nm_bdg_fwd *start_ft = NULL; ND("slot %d frags %d", i, ft[i].ft_frags); - /* Drop the packet if the virtio-net header is not into the first - fragment nor at the very beginning of the second. */ - if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) + + if (na->up.virt_hdr_len < ft[i].ft_len) { + ft[i].ft_offset = na->up.virt_hdr_len; + start_ft = &ft[i]; + } else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) { + ft[i].ft_offset = ft[i].ft_len; + start_ft = &ft[i+1]; + } else { + /* Drop the packet if the virtio-net header is not into the first + * fragment nor at the very beginning of the second. + */ continue; - dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); + } + dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); if (dst_port >= NM_BDG_NOPORT) @@ -1940,7 +2148,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, nrings = dst_na->up.num_rx_rings; if (dst_nr >= nrings) dst_nr = dst_nr % nrings; - kring = &dst_na->up.rx_rings[dst_nr]; + kring = dst_na->up.rx_rings[dst_nr]; ring = kring->ring; /* the destination ring may have not been opened for RX */ if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) @@ -2224,8 +2432,9 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - if (vpna->na_bdg) + if (vpna->na_bdg) { return netmap_bwrap_attach(name, na); + } na->na_vp = vpna; strncpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; @@ -2236,14 +2445,19 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) * Only persistent VALE ports have a non-null ifp. */ static int -netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, - struct netmap_mem_d *nmd, - struct netmap_vp_adapter **ret) +netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, + struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct netmap_vp_adapter *vpna; struct netmap_adapter *na; int error = 0; u_int npipes = 0; + u_int extrabufs = 0; + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } vpna = nm_os_malloc(sizeof(*vpna)); if (vpna == NULL) @@ -2252,31 +2466,30 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, na = &vpna->up; na->ifp = ifp; - strncpy(na->name, nmr->nr_name, sizeof(na->name)); + strncpy(na->name, hdr->nr_name, sizeof(na->name)); /* bound checking */ - na->num_tx_rings = nmr->nr_tx_rings; + na->num_tx_rings = req->nr_tx_rings; nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_tx_rings = na->num_tx_rings; // write back - na->num_rx_rings = nmr->nr_rx_rings; + req->nr_tx_rings = na->num_tx_rings; /* write back */ + na->num_rx_rings = req->nr_rx_rings; nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_rx_rings = na->num_rx_rings; // write back - nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, + req->nr_rx_rings = na->num_rx_rings; /* write back */ + nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1, NM_BDG_MAXSLOTS, NULL); - na->num_tx_desc = nmr->nr_tx_slots; - nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, + na->num_tx_desc = req->nr_tx_slots; + nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1, NM_BDG_MAXSLOTS, NULL); /* validate number of pipes. We want at least 1, * but probably can do with some more. * So let's use 2 as default (when 0 is supplied) */ - npipes = nmr->nr_arg1; nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); - nmr->nr_arg1 = npipes; /* write back */ /* validate extra bufs */ - nm_bound_var(&nmr->nr_arg3, 0, 0, + nm_bound_var(&extrabufs, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); - na->num_rx_desc = nmr->nr_rx_slots; + req->nr_extra_bufs = extrabufs; /* write back */ + na->num_rx_desc = req->nr_rx_slots; /* Set the mfs to a default value, as it is needed on the VALE * mismatch datapath. XXX We should set it according to the MTU * known to the kernel. */ @@ -2299,13 +2512,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, na->nm_krings_create = netmap_vp_krings_create; na->nm_krings_delete = netmap_vp_krings_delete; na->nm_dtor = netmap_vp_dtor; - D("nr_arg2 %d", nmr->nr_arg2); + ND("nr_mem_id %d", req->nr_mem_id); na->nm_mem = nmd ? netmap_mem_get(nmd): netmap_mem_private_new( na->num_tx_rings, na->num_tx_desc, na->num_rx_rings, na->num_rx_desc, - nmr->nr_arg3, npipes, &error); + req->nr_extra_bufs, npipes, &error); if (na->nm_mem == NULL) goto err; na->nm_bdg_attach = netmap_vp_bdg_attach; @@ -2373,8 +2586,9 @@ netmap_bwrap_dtor(struct netmap_adapter *na) ND("na %p", na); na->ifp = NULL; bna->host.up.ifp = NULL; + hwna->na_vp = bna->saved_na_vp; + hwna->na_hostvp = NULL; hwna->na_private = NULL; - hwna->na_vp = hwna->na_hostvp = NULL; hwna->na_flags &= ~NAF_BUSY; netmap_adapter_put(hwna); @@ -2414,7 +2628,7 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) if (netmap_verbose) D("%s %s 0x%x", na->name, kring->name, flags); - bkring = &vpna->up.tx_rings[ring_nr]; + bkring = vpna->up.tx_rings[ring_nr]; /* make sure the ring is not disabled */ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { @@ -2497,8 +2711,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* pass down the pending ring state information */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) - NMR(hwna, t)[i].nr_pending_mode = - NMR(na, t)[i].nr_pending_mode; + NMR(hwna, t)[i]->nr_pending_mode = + NMR(na, t)[i]->nr_pending_mode; } /* forward the request to the hwna */ @@ -2509,8 +2723,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* copy up the current ring state information */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(hwna, t)[i]; - NMR(na, t)[i].nr_mode = kring->nr_mode; + struct netmap_kring *kring = NMR(hwna, t)[i]; + NMR(na, t)[i]->nr_mode = kring->nr_mode; } } @@ -2523,15 +2737,15 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) u_int i; /* intercept the hwna nm_nofify callback on the hw rings */ for (i = 0; i < hwna->num_rx_rings; i++) { - hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; - hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; } i = hwna->num_rx_rings; /* for safety */ /* save the host ring notify unconditionally */ - hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; if (hostna->na_bdg) { /* also intercept the host ring notify */ - hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; } if (na->active_fds == 0) na->na_flags |= NAF_NETMAP_ON; @@ -2543,17 +2757,18 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* reset all notify callbacks (including host ring) */ for (i = 0; i <= hwna->num_rx_rings; i++) { - hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; - hwna->rx_rings[i].save_notify = NULL; + hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify; + hwna->rx_rings[i]->save_notify = NULL; } hwna->na_lut.lut = NULL; + hwna->na_lut.plut = NULL; hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; /* pass ownership of the netmap rings to the hwna */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - NMR(na, t)[i].ring = NULL; + NMR(na, t)[i]->ring = NULL; } } @@ -2564,8 +2779,7 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* nm_config callback for bwrap */ static int -netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) +netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; @@ -2573,11 +2787,12 @@ netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, /* forward the request */ netmap_update_config(hwna); - /* swap the results */ - *txr = hwna->num_rx_rings; - *txd = hwna->num_rx_desc; - *rxr = hwna->num_tx_rings; - *rxd = hwna->num_rx_desc; + /* swap the results and propagate */ + info->num_tx_rings = hwna->num_rx_rings; + info->num_tx_descs = hwna->num_rx_desc; + info->num_rx_rings = hwna->num_tx_rings; + info->num_rx_descs = hwna->num_tx_desc; + info->rx_buf_maxsize = hwna->rx_buf_maxsize; return 0; } @@ -2610,7 +2825,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) /* increment the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i].users++; + NMR(hwna, t)[i]->users++; } } @@ -2627,8 +2842,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; - NMR(na, t)[i].ring = NMR(hwna, r)[i].ring; + NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; + NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; } } @@ -2638,16 +2853,16 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) * hostna */ hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hostna->tx_rings[0].na = hostna; + hostna->tx_rings[0]->na = hostna; hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hostna->rx_rings[0].na = hostna; + hostna->rx_rings[0]->na = hostna; } return 0; err_dec_users: for_rx_tx(t) { - NMR(hwna, t)[i].users--; + NMR(hwna, t)[i]->users--; } hwna->nm_krings_delete(hwna); err_del_vp_rings: @@ -2671,7 +2886,7 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) /* decrement the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i].users--; + NMR(hwna, t)[i]->users--; } } @@ -2698,7 +2913,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) (kring ? kring->name : "NULL!"), (na ? na->name : "NULL!"), (hwna ? hwna->name : "NULL!")); - hw_kring = &hwna->tx_rings[ring_n]; + hw_kring = hwna->tx_rings[ring_n]; if (nm_kr_tryget(hw_kring, 0, NULL)) { return ENXIO; @@ -2746,13 +2961,22 @@ put_out: * directed to hwna. */ static int -netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) +netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_priv_d *npriv; struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; int error = 0; - if (attach) { + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + if (req->reg.nr_ringid != 0 || + (req->reg.nr_mode != NR_REG_ALL_NIC && + req->reg.nr_mode != NR_REG_NIC_SW)) { + /* We only support attaching all the NIC rings + * and/or the host stack. */ + return EINVAL; + } if (NETMAP_OWNED_BY_ANY(na)) { return EBUSY; } @@ -2764,7 +2988,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) if (npriv == NULL) return ENOMEM; npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ - error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); + error = netmap_do_regif(npriv, na, req->reg.nr_mode, + req->reg.nr_ringid, req->reg.nr_flags); if (error) { netmap_priv_delete(npriv); return error; @@ -2778,8 +3003,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) bna->na_kpriv = NULL; na->na_flags &= ~NAF_BUSY; } - return error; + return error; } /* attach a bridge wrapper to the 'real' device */ @@ -2837,7 +3062,9 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) bna->hwna = hwna; netmap_adapter_get(hwna); hwna->na_private = bna; /* weak reference */ + bna->saved_na_vp = hwna->na_vp; hwna->na_vp = &bna->up; + bna->up.up.na_vp = &(bna->up); if (hwna->na_flags & NAF_HOST_RINGS) { if (hwna->na_flags & NAF_SW_ONLY) -- cgit v1.3