diff options
| author | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-04-12 07:20:50 +0000 |
|---|---|---|
| committer | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-04-12 07:20:50 +0000 |
| commit | 2ff91c175eca50b7d0d9da6b31eae4109c034137 (patch) | |
| tree | 15a4f8847a8cabd782f67326125c48fed4fdd27b /sys/dev/netmap | |
| parent | 66def52613043a86172a2ebe6feab214258fa2fa (diff) | |
Notes
Diffstat (limited to 'sys/dev/netmap')
| -rw-r--r-- | sys/dev/netmap/if_ptnet.c | 34 | ||||
| -rw-r--r-- | sys/dev/netmap/if_re_netmap.h | 6 | ||||
| -rw-r--r-- | sys/dev/netmap/if_vtnet_netmap.h | 26 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap.c | 1073 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_freebsd.c | 120 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_generic.c | 6 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_kern.h | 155 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_legacy.c | 428 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_mem2.c | 564 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_mem2.h | 21 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_monitor.c | 56 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_pipe.c | 321 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_pt.c | 193 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_vale.c | 931 |
14 files changed, 2570 insertions, 1364 deletions
diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index 1805a7f31e48d..b6059dc55cfad 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -210,8 +210,8 @@ static int ptnet_irqs_init(struct ptnet_softc *sc); static void ptnet_irqs_fini(struct ptnet_softc *sc); static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd); -static int ptnet_nm_config(struct netmap_adapter *na, unsigned *txr, - unsigned *txd, unsigned *rxr, unsigned *rxd); +static int ptnet_nm_config(struct netmap_adapter *na, + struct nm_config_info *info); static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); static int ptnet_nm_register(struct netmap_adapter *na, int onoff); static int ptnet_nm_txsync(struct netmap_kring *kring, int flags); @@ -1104,18 +1104,20 @@ ptnet_nm_ptctl(if_t ifp, uint32_t cmd) } static int -ptnet_nm_config(struct netmap_adapter *na, unsigned *txr, unsigned *txd, - unsigned *rxr, unsigned *rxd) +ptnet_nm_config(struct netmap_adapter *na, struct nm_config_info *info) { struct ptnet_softc *sc = if_getsoftc(na->ifp); - *txr = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS); - *rxr = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS); - *txd = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS); - *rxd = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS); + info->num_tx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS); + info->num_rx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS); + info->num_tx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS); + info->num_rx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS); + info->rx_buf_maxsize = NETMAP_BUF_SIZE(na); - device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u\n", - *txr, *rxr, *txd, *rxd); + device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u, rxbufsz %u\n", + info->num_tx_rings, info->num_rx_rings, + info->num_tx_descs, info->num_rx_descs, + info->rx_buf_maxsize); return 0; } @@ -1133,9 +1135,9 @@ ptnet_sync_from_csb(struct ptnet_softc *sc, struct netmap_adapter *na) struct netmap_kring *kring; if (i < na->num_tx_rings) { - kring = na->tx_rings + i; + kring = na->tx_rings[i]; } else { - kring = na->rx_rings + i - na->num_tx_rings; + kring = na->rx_rings[i - na->num_tx_rings]; } kring->rhead = kring->ring->head = ptgh->head; kring->rcur = kring->ring->cur = ptgh->cur; @@ -1228,7 +1230,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) if (native) { for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { kring->nr_mode = NKR_NETMAP_ON; @@ -1243,7 +1245,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) nm_clear_native_flags(na); for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; @@ -1758,7 +1760,7 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget, ptgh = pq->ptgh; pthg = pq->pthg; - kring = na->tx_rings + pq->kring_id; + kring = na->tx_rings[pq->kring_id]; ring = kring->ring; lim = kring->nkr_num_slots - 1; head = ring->head; @@ -2021,7 +2023,7 @@ ptnet_rx_eof(struct ptnet_queue *pq, unsigned int budget, bool may_resched) struct ptnet_csb_gh *ptgh = pq->ptgh; struct ptnet_csb_hg *pthg = pq->pthg; struct netmap_adapter *na = &sc->ptna->dr.up; - struct netmap_kring *kring = na->rx_rings + pq->kring_id; + struct netmap_kring *kring = na->rx_rings[pq->kring_id]; struct netmap_ring *ring = kring->ring; unsigned int const lim = kring->nkr_num_slots - 1; unsigned int batch_count = 0; diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index e7dd087acc676..0e56a731ac6aa 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -304,7 +304,7 @@ re_netmap_tx_init(struct rl_softc *sc) /* l points in the netmap ring, i points in the NIC ring */ for (i = 0; i < n; i++) { uint64_t paddr; - int l = netmap_idx_n2k(&na->tx_rings[0], i); + int l = netmap_idx_n2k(na->tx_rings[0], i); void *addr = PNMB(na, slot + l, &paddr); desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); @@ -330,11 +330,11 @@ re_netmap_rx_init(struct rl_softc *sc) * Do not release the slots owned by userspace, * and also keep one empty. */ - max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]); + max_avail = n - 1 - nm_kr_rxspace(na->rx_rings[0]); for (nic_i = 0; nic_i < n; nic_i++) { void *addr; uint64_t paddr; - uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i); + uint32_t nm_i = netmap_idx_n2k(na->rx_rings[0], nic_i); addr = PNMB(na, slot + nm_i, &paddr); diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h index 10789c53d1f06..e4ab64d2ed976 100644 --- a/sys/dev/netmap/if_vtnet_netmap.h +++ b/sys/dev/netmap/if_vtnet_netmap.h @@ -383,7 +383,7 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) if (!nm_native_on(na)) return 0; for (r = 0; r < na->num_rx_rings; r++) { - struct netmap_kring *kring = &na->rx_rings[r]; + struct netmap_kring *kring = na->rx_rings[r]; struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r]; struct virtqueue *vq = rxq->vtnrx_vq; struct netmap_slot* slot; @@ -407,29 +407,6 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) return 1; } -/* Update the virtio-net device configurations. Number of queues can - * change dinamically, by 'ethtool --set-channels $IFNAME combined $N'. - * This is actually the only way virtio-net can currently enable - * the multiqueue mode. - * XXX note that we seem to lose packets if the netmap ring has more - * slots than the queue - */ -static int -vtnet_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *sc = ifp->if_softc; - - *txr = *rxr = sc->vtnet_max_vq_pairs; - *rxd = 512; // sc->vtnet_rx_nmbufs; - *txd = *rxd; // XXX - D("vtnet config txq=%d, txd=%d rxq=%d, rxd=%d", - *txr, *txd, *rxr, *rxd); - - return 0; -} - static void vtnet_netmap_attach(struct SOFTC_T *sc) { @@ -443,7 +420,6 @@ vtnet_netmap_attach(struct SOFTC_T *sc) na.nm_register = vtnet_netmap_reg; na.nm_txsync = vtnet_netmap_txsync; na.nm_rxsync = vtnet_netmap_rxsync; - na.nm_config = vtnet_netmap_config; na.nm_intr = vtnet_netmap_intr; na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; D("max rings %d", sc->vtnet_max_vq_pairs); diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 3c5551bad1569..d6230dfb8ebe1 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -262,7 +262,7 @@ ports attached to the switch) * * Any network interface known to the system (including a persistent VALE * port) can be attached to a VALE switch by issuing the - * NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports + * NETMAP_REQ_VALE_ATTACH command. After the attachment, persistent VALE ports * look exactly like ephemeral VALE ports (as created in step 2 above). The * attachment of other interfaces, instead, requires the creation of a * netmap_bwrap_adapter. Moreover, the attached interface must be put in @@ -591,9 +591,9 @@ void netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped) { if (stopped) - netmap_disable_ring(NMR(na, t) + ring_id, stopped); + netmap_disable_ring(NMR(na, t)[ring_id], stopped); else - NMR(na, t)[ring_id].nkr_stopped = 0; + NMR(na, t)[ring_id]->nkr_stopped = 0; } @@ -745,39 +745,42 @@ nm_dump_buf(char *p, int len, int lim, char *dst) int netmap_update_config(struct netmap_adapter *na) { - u_int txr, txd, rxr, rxd; + struct nm_config_info info; - txr = txd = rxr = rxd = 0; + bzero(&info, sizeof(info)); if (na->nm_config == NULL || - na->nm_config(na, &txr, &txd, &rxr, &rxd)) - { + na->nm_config(na, &info)) { /* take whatever we had at init time */ - txr = na->num_tx_rings; - txd = na->num_tx_desc; - rxr = na->num_rx_rings; - rxd = na->num_rx_desc; + info.num_tx_rings = na->num_tx_rings; + info.num_tx_descs = na->num_tx_desc; + info.num_rx_rings = na->num_rx_rings; + info.num_rx_descs = na->num_rx_desc; + info.rx_buf_maxsize = na->rx_buf_maxsize; } - if (na->num_tx_rings == txr && na->num_tx_desc == txd && - na->num_rx_rings == rxr && na->num_rx_desc == rxd) + if (na->num_tx_rings == info.num_tx_rings && + na->num_tx_desc == info.num_tx_descs && + na->num_rx_rings == info.num_rx_rings && + na->num_rx_desc == info.num_rx_descs && + na->rx_buf_maxsize == info.rx_buf_maxsize) return 0; /* nothing changed */ - if (netmap_verbose || na->active_fds > 0) { - D("stored config %s: txring %d x %d, rxring %d x %d", - na->name, - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc); - D("new config %s: txring %d x %d, rxring %d x %d", - na->name, txr, txd, rxr, rxd); - } if (na->active_fds == 0) { - D("configuration changed (but fine)"); - na->num_tx_rings = txr; - na->num_tx_desc = txd; - na->num_rx_rings = rxr; - na->num_rx_desc = rxd; + D("configuration changed for %s: txring %d x %d, " + "rxring %d x %d, rxbufsz %d", + na->name, na->num_tx_rings, na->num_tx_desc, + na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize); + na->num_tx_rings = info.num_tx_rings; + na->num_tx_desc = info.num_tx_descs; + na->num_rx_rings = info.num_rx_rings; + na->num_rx_desc = info.num_rx_descs; + na->rx_buf_maxsize = info.rx_buf_maxsize; return 0; } - D("configuration changed while active, this is bad..."); + D("WARNING: configuration changed for %s while active: " + "txring %d x %d, rxring %d x %d, rxbufsz %d", + na->name, info.num_tx_rings, info.num_tx_descs, + info.num_rx_rings, info.num_rx_descs, + info.rx_buf_maxsize); return 1; } @@ -827,7 +830,9 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) n[NR_TX] = na->num_tx_rings + 1; n[NR_RX] = na->num_rx_rings + 1; - len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom; + len = (n[NR_TX] + n[NR_RX]) * + (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *)) + + tailroom; na->tx_rings = nm_os_malloc((size_t)len); if (na->tx_rings == NULL) { @@ -835,6 +840,14 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) return ENOMEM; } na->rx_rings = na->tx_rings + n[NR_TX]; + na->tailroom = na->rx_rings + n[NR_RX]; + + /* link the krings in the krings array */ + kring = (struct netmap_kring *)((char *)na->tailroom + tailroom); + for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) { + na->tx_rings[i] = kring; + kring++; + } /* * All fields in krings are 0 except the one initialized below. @@ -843,9 +856,10 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) for_rx_tx(t) { ndesc = nma_get_ndesc(na, t); for (i = 0; i < n[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); kring->na = na; + kring->notify_na = na; kring->ring_id = i; kring->tx = t; kring->nkr_num_slots = ndesc; @@ -854,6 +868,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) if (i < nma_get_nrings(na, t)) { kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync); } else { + if (!(na->na_flags & NAF_HOST_RINGS)) + kring->nr_kflags |= NKR_FAKERING; kring->nm_sync = (t == NR_TX ? netmap_txsync_to_host: netmap_rxsync_from_host); @@ -874,7 +890,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) nm_os_selinfo_init(&na->si[t]); } - na->tailroom = na->rx_rings + n[NR_RX]; return 0; } @@ -885,7 +900,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) void netmap_krings_delete(struct netmap_adapter *na) { - struct netmap_kring *kring = na->tx_rings; + struct netmap_kring **kring = na->tx_rings; enum txrx t; if (na->tx_rings == NULL) { @@ -898,8 +913,8 @@ netmap_krings_delete(struct netmap_adapter *na) /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&kring->q_lock); - nm_os_selinfo_uninit(&kring->si); + mtx_destroy(&(*kring)->q_lock); + nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); na->tx_rings = na->rx_rings = na->tailroom = NULL; @@ -915,7 +930,7 @@ netmap_krings_delete(struct netmap_adapter *na) void netmap_hw_krings_delete(struct netmap_adapter *na) { - struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue; + struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue; ND("destroy sw mbq with len %d", mbq_len(q)); mbq_purge(q); @@ -1196,7 +1211,7 @@ nm_may_forward_down(struct netmap_kring *kring, int sync_flags) static u_int netmap_sw_to_nic(struct netmap_adapter *na) { - struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; + struct netmap_kring *kring = na->rx_rings[na->num_rx_rings]; struct netmap_slot *rxslot = kring->ring->slot; u_int i, rxcur = kring->nr_hwcur; u_int const head = kring->rhead; @@ -1205,7 +1220,7 @@ netmap_sw_to_nic(struct netmap_adapter *na) /* scan rings to find space, then fill as much as possible */ for (i = 0; i < na->num_tx_rings; i++) { - struct netmap_kring *kdst = &na->tx_rings[i]; + struct netmap_kring *kdst = na->tx_rings[i]; struct netmap_ring *rdst = kdst->ring; u_int const dst_lim = kdst->nkr_num_slots - 1; @@ -1443,7 +1458,7 @@ assign_mem: * MUST BE CALLED UNDER NMG_LOCK() * * Get a refcounted reference to a netmap adapter attached - * to the interface specified by nmr. + * to the interface specified by req. * This is always called in the execution of an ioctl(). * * Return ENXIO if the interface specified by the request does @@ -1453,13 +1468,15 @@ assign_mem: * could not be allocated. * If successful, hold a reference to the netmap adapter. * - * If the interface specified by nmr is a system one, also keep + * If the interface specified by req is a system one, also keep * a reference to it and return a valid *ifp. */ int -netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, - struct ifnet **ifp, struct netmap_mem_d *nmd, int create) +netmap_get_na(struct nmreq_header *hdr, + struct netmap_adapter **na, struct ifnet **ifp, + struct netmap_mem_d *nmd, int create) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; int error = 0; struct netmap_adapter *ret = NULL; int nmd_ref = 0; @@ -1467,13 +1484,24 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, *na = NULL; /* default return value */ *ifp = NULL; + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } + + if (req->nr_mode == NR_REG_PIPE_MASTER || + req->nr_mode == NR_REG_PIPE_SLAVE) { + /* Do not accept deprecated pipe modes. */ + D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax"); + return EINVAL; + } + NMG_LOCK_ASSERT(); /* if the request contain a memid, try to find the * corresponding memory region */ - if (nmd == NULL && nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (nmd == NULL && req->nr_mem_id) { + nmd = netmap_mem_find(req->nr_mem_id); if (nmd == NULL) return EINVAL; /* keep the rereference */ @@ -1492,22 +1520,22 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, */ /* try to see if this is a ptnetmap port */ - error = netmap_get_pt_host_na(nmr, na, nmd, create); + error = netmap_get_pt_host_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a monitor port */ - error = netmap_get_monitor_na(nmr, na, nmd, create); + error = netmap_get_monitor_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a pipe port */ - error = netmap_get_pipe_na(nmr, na, nmd, create); + error = netmap_get_pipe_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; /* try to see if this is a bridge port */ - error = netmap_get_bdg_na(nmr, na, nmd, create); + error = netmap_get_bdg_na(hdr, na, nmd, create); if (error) goto out; @@ -1520,7 +1548,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, * This may still be a tap, a veth/epair, or even a * persistent VALE port. */ - *ifp = ifunit_ref(nmr->nr_name); + *ifp = ifunit_ref(hdr->nr_name); if (*ifp == NULL) { error = ENXIO; goto out; @@ -1765,42 +1793,27 @@ netmap_ring_reinit(struct netmap_kring *kring) * */ int -netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) +netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_adapter *na = priv->np_na; - u_int j, i = ringid & NETMAP_RING_MASK; - u_int reg = flags & NR_REG_MASK; int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY }; enum txrx t; + u_int j; - if (reg == NR_REG_DEFAULT) { - /* convert from old ringid to flags */ - if (ringid & NETMAP_SW_RING) { - reg = NR_REG_SW; - } else if (ringid & NETMAP_HW_RING) { - reg = NR_REG_ONE_NIC; - } else { - reg = NR_REG_ALL_NIC; - } - D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg); - } - - if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC && - reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) || - flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { + if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) || + nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { D("Error: only NR_REG_ALL_NIC supported with netmap passthrough"); return EINVAL; } for_rx_tx(t) { - if (flags & excluded_direction[t]) { + if (nr_flags & excluded_direction[t]) { priv->np_qfirst[t] = priv->np_qlast[t] = 0; continue; } - switch (reg) { + switch (nr_mode) { case NR_REG_ALL_NIC: - case NR_REG_PIPE_MASTER: - case NR_REG_PIPE_SLAVE: priv->np_qfirst[t] = 0; priv->np_qlast[t] = nma_get_nrings(na, t); ND("ALL/PIPE: %s %d %d", nm_txrx2str(t), @@ -1812,20 +1825,21 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags D("host rings not supported"); return EINVAL; } - priv->np_qfirst[t] = (reg == NR_REG_SW ? + priv->np_qfirst[t] = (nr_mode == NR_REG_SW ? nma_get_nrings(na, t) : 0); priv->np_qlast[t] = nma_get_nrings(na, t) + 1; - ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW", + ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW", nm_txrx2str(t), priv->np_qfirst[t], priv->np_qlast[t]); break; case NR_REG_ONE_NIC: - if (i >= na->num_tx_rings && i >= na->num_rx_rings) { - D("invalid ring id %d", i); + if (nr_ringid >= na->num_tx_rings && + nr_ringid >= na->num_rx_rings) { + D("invalid ring id %d", nr_ringid); return EINVAL; } /* if not enough rings, use the first one */ - j = i; + j = nr_ringid; if (j >= nma_get_nrings(na, t)) j = 0; priv->np_qfirst[t] = j; @@ -1834,11 +1848,11 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags priv->np_qfirst[t], priv->np_qlast[t]); break; default: - D("invalid regif type %d", reg); + D("invalid regif type %d", nr_mode); return EINVAL; } } - priv->np_flags = (flags & ~NR_REG_MASK) | reg; + priv->np_flags = nr_flags | nr_mode; // TODO /* Allow transparent forwarding mode in the host --> nic * direction only if all the TX hw rings have been opened. */ @@ -1854,7 +1868,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags priv->np_qlast[NR_TX], priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX], - i); + nr_ringid); } return 0; } @@ -1865,18 +1879,19 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags * for all rings is the same as a single ring. */ static int -netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) +netmap_set_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_adapter *na = priv->np_na; int error; enum txrx t; - error = netmap_interp_ringid(priv, ringid, flags); + error = netmap_interp_ringid(priv, nr_mode, nr_ringid, nr_flags); if (error) { return error; } - priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1; + priv->np_txpoll = (nr_flags & NR_NO_TX_POLL) ? 0 : 1; /* optimization: count the users registered for more than * one ring, which are the ones sleeping on the global queue. @@ -1933,7 +1948,7 @@ netmap_krings_get(struct netmap_priv_d *priv) */ for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if ((kring->nr_kflags & NKR_EXCLUSIVE) || (kring->users && excl)) { @@ -1948,7 +1963,7 @@ netmap_krings_get(struct netmap_priv_d *priv) */ for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; kring->users++; if (excl) kring->nr_kflags |= NKR_EXCLUSIVE; @@ -1979,10 +1994,9 @@ netmap_krings_put(struct netmap_priv_d *priv) priv->np_qfirst[NR_RX], priv->np_qlast[MR_RX]); - for_rx_tx(t) { for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if (excl) kring->nr_kflags &= ~NKR_EXCLUSIVE; kring->users--; @@ -1992,6 +2006,12 @@ netmap_krings_put(struct netmap_priv_d *priv) } } +static int +nm_priv_rx_enabled(struct netmap_priv_d *priv) +{ + return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]); +} + /* * possibly move the interface to netmap-mode. * If success it returns a pointer to netmap_if, otherwise NULL. @@ -2064,16 +2084,14 @@ netmap_krings_put(struct netmap_priv_d *priv) */ int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags) + uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags) { struct netmap_if *nifp = NULL; int error; NMG_LOCK_ASSERT(); - /* ring configuration may have changed, fetch from the card */ - netmap_update_config(na); priv->np_na = na; /* store the reference */ - error = netmap_set_ringid(priv, ringid, flags); + error = netmap_set_ringid(priv, nr_mode, nr_ringid, nr_flags); if (error) goto err; error = netmap_mem_finalize(na->nm_mem, na); @@ -2081,27 +2099,38 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, goto err; if (na->active_fds == 0) { + + /* cache the allocator info in the na */ + error = netmap_mem_get_lut(na->nm_mem, &na->na_lut); + if (error) + goto err_drop_mem; + ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal, + na->na_lut.objsize); + + /* ring configuration may have changed, fetch from the card */ + netmap_update_config(na); + /* * If this is the first registration of the adapter, * perform sanity checks and create the in-kernel view * of the netmap rings (the netmap krings). */ - if (na->ifp) { + if (na->ifp && nm_priv_rx_enabled(priv)) { /* This netmap adapter is attached to an ifnet. */ unsigned nbs = netmap_mem_bufsize(na->nm_mem); unsigned mtu = nm_os_ifnet_mtu(na->ifp); - /* The maximum amount of bytes that a single - * receive or transmit NIC descriptor can hold. */ - unsigned hw_max_slot_len = 4096; - if (mtu <= hw_max_slot_len) { + ND("mtu %d rx_buf_maxsize %d netmap_buf_size %d", + mtu, na->rx_buf_maxsize, nbs); + + if (mtu <= na->rx_buf_maxsize) { /* The MTU fits a single NIC slot. We only * Need to check that netmap buffers are * large enough to hold an MTU. NS_MOREFRAG * cannot be used in this case. */ if (nbs < mtu) { nm_prerr("error: netmap buf size (%u) " - "< device MTU (%u)", nbs, mtu); + "< device MTU (%u)\n", nbs, mtu); error = EINVAL; goto err_drop_mem; } @@ -2114,22 +2143,22 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, if (!(na->na_flags & NAF_MOREFRAG)) { nm_prerr("error: large MTU (%d) needed " "but %s does not support " - "NS_MOREFRAG", mtu, + "NS_MOREFRAG\n", mtu, na->ifp->if_xname); error = EINVAL; goto err_drop_mem; - } else if (nbs < hw_max_slot_len) { + } else if (nbs < na->rx_buf_maxsize) { nm_prerr("error: using NS_MOREFRAG on " "%s requires netmap buf size " - ">= %u", na->ifp->if_xname, - hw_max_slot_len); + ">= %u\n", na->ifp->if_xname, + na->rx_buf_maxsize); error = EINVAL; goto err_drop_mem; } else { nm_prinf("info: netmap application on " "%s needs to support " "NS_MOREFRAG " - "(MTU=%u,netmap_buf_size=%u)", + "(MTU=%u,netmap_buf_size=%u)\n", na->ifp->if_xname, mtu, nbs); } } @@ -2141,7 +2170,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, */ error = na->nm_krings_create(na); if (error) - goto err_drop_mem; + goto err_put_lut; } @@ -2165,21 +2194,12 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, goto err_del_rings; } - if (na->active_fds == 0) { - /* cache the allocator info in the na */ - error = netmap_mem_get_lut(na->nm_mem, &na->na_lut); - if (error) - goto err_del_if; - ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal, - na->na_lut.objsize); - } - if (nm_kring_pending(priv)) { /* Some kring is switching mode, tell the adapter to * react on this. */ error = na->nm_register(na, 1); if (error) - goto err_put_lut; + goto err_del_if; } /* Commit the reference. */ @@ -2195,9 +2215,6 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, return 0; -err_put_lut: - if (na->active_fds == 0) - memset(&na->na_lut, 0, sizeof(na->na_lut)); err_del_if: netmap_mem_if_delete(na, nifp); err_del_rings: @@ -2207,6 +2224,9 @@ err_rel_excl: err_del_krings: if (na->active_fds == 0) na->nm_krings_delete(na); +err_put_lut: + if (na->active_fds == 0) + memset(&na->na_lut, 0, sizeof(na->na_lut)); err_drop_mem: netmap_mem_drop(na); err: @@ -2242,246 +2262,367 @@ ring_timestamp_set(struct netmap_ring *ring) } } +static int nmreq_copyin(struct nmreq_header *, int); +static int nmreq_copyout(struct nmreq_header *, int); +static int nmreq_checkoptions(struct nmreq_header *); /* * ioctl(2) support for the "netmap" device. * * Following a list of accepted commands: - * - NIOCGINFO + * - NIOCCTRL device control API + * - NIOCTXSYNC sync TX rings + * - NIOCRXSYNC sync RX rings * - SIOCGIFADDR just for convenience - * - NIOCREGIF - * - NIOCTXSYNC - * - NIOCRXSYNC + * - NIOCGINFO deprecated (legacy API) + * - NIOCREGIF deprecated (legacy API) * * Return 0 on success, errno otherwise. */ int -netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td) +netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td, int nr_body_is_user) { struct mbq q; /* packets from RX hw queues to host stack */ - struct nmreq *nmr = (struct nmreq *) data; struct netmap_adapter *na = NULL; struct netmap_mem_d *nmd = NULL; struct ifnet *ifp = NULL; int error = 0; u_int i, qfirst, qlast; struct netmap_if *nifp; - struct netmap_kring *krings; + struct netmap_kring **krings; int sync_flags; enum txrx t; - if (cmd == NIOCGINFO || cmd == NIOCREGIF) { - /* truncate name */ - nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; - if (nmr->nr_version != NETMAP_API) { - D("API mismatch for %s got %d need %d", - nmr->nr_name, - nmr->nr_version, NETMAP_API); - nmr->nr_version = NETMAP_API; + switch (cmd) { + case NIOCCTRL: { + struct nmreq_header *hdr = (struct nmreq_header *)data; + + if (hdr->nr_version != NETMAP_API) { + D("API mismatch for reqtype %d: got %d need %d", + hdr->nr_version, + hdr->nr_version, NETMAP_API); + hdr->nr_version = NETMAP_API; } - if (nmr->nr_version < NETMAP_MIN_API || - nmr->nr_version > NETMAP_MAX_API) { + if (hdr->nr_version < NETMAP_MIN_API || + hdr->nr_version > NETMAP_MAX_API) { return EINVAL; } - } - switch (cmd) { - case NIOCGINFO: /* return capabilities etc */ - if (nmr->nr_cmd == NETMAP_BDG_LIST) { - error = netmap_bdg_ctl(nmr, NULL); - break; + /* Make a kernel-space copy of the user-space nr_body. + * For convenince, the nr_body pointer and the pointers + * in the options list will be replaced with their + * kernel-space counterparts. The original pointers are + * saved internally and later restored by nmreq_copyout + */ + error = nmreq_copyin(hdr, nr_body_is_user); + if (error) { + return error; } - NMG_LOCK(); - do { - /* memsize is always valid */ - u_int memflags; - uint64_t memsize; + /* Sanitize hdr->nr_name. */ + hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0'; + + switch (hdr->nr_reqtype) { + case NETMAP_REQ_REGISTER: { + struct nmreq_register *req = + (struct nmreq_register *)hdr->nr_body; + /* Protect access to priv from concurrent requests. */ + NMG_LOCK(); + do { + u_int memflags; +#ifdef WITH_EXTMEM + struct nmreq_option *opt; +#endif /* WITH_EXTMEM */ + + if (priv->np_nifp != NULL) { /* thread already registered */ + error = EBUSY; + break; + } + +#ifdef WITH_EXTMEM + opt = nmreq_findoption((struct nmreq_option *)hdr->nr_options, + NETMAP_REQ_OPT_EXTMEM); + if (opt != NULL) { + struct nmreq_opt_extmem *e = + (struct nmreq_opt_extmem *)opt; + + error = nmreq_checkduplicate(opt); + if (error) { + opt->nro_status = error; + break; + } + nmd = netmap_mem_ext_create(e->nro_usrptr, + &e->nro_info, &error); + opt->nro_status = error; + if (nmd == NULL) + break; + } +#endif /* WITH_EXTMEM */ + + if (nmd == NULL && req->nr_mem_id) { + /* find the allocator and get a reference */ + nmd = netmap_mem_find(req->nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + break; + } + } + /* find the interface and a reference */ + error = netmap_get_na(hdr, &na, &ifp, nmd, + 1 /* create */); /* keep reference */ + if (error) + break; + if (NETMAP_OWNED_BY_KERN(na)) { + error = EBUSY; + break; + } + + if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) { + error = EIO; + break; + } - if (nmr->nr_name[0] != '\0') { + error = netmap_do_regif(priv, na, req->nr_mode, + req->nr_ringid, req->nr_flags); + if (error) { /* reg. failed, release priv and ref */ + break; + } + nifp = priv->np_nifp; + priv->np_td = td; /* for debugging purposes */ - /* get a refcount */ - error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */); + /* return the offset of the netmap_if object */ + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags, + &req->nr_mem_id); if (error) { - na = NULL; - ifp = NULL; + netmap_do_unregif(priv); break; } - nmd = na->nm_mem; /* get memory allocator */ - } else { - nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1); - if (nmd == NULL) { - error = EINVAL; + if (memflags & NETMAP_MEM_PRIVATE) { + *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; + } + for_rx_tx(t) { + priv->np_si[t] = nm_si_user(priv, t) ? + &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si; + } + + if (req->nr_extra_bufs) { + if (netmap_verbose) + D("requested %d extra buffers", + req->nr_extra_bufs); + req->nr_extra_bufs = netmap_extra_alloc(na, + &nifp->ni_bufs_head, req->nr_extra_bufs); + if (netmap_verbose) + D("got %d extra buffers", req->nr_extra_bufs); + } + req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); + + error = nmreq_checkoptions(hdr); + if (error) { + netmap_do_unregif(priv); break; } + + /* store ifp reference so that priv destructor may release it */ + priv->np_ifp = ifp; + } while (0); + if (error) { + netmap_unget_na(na, ifp); } + /* release the reference from netmap_mem_find() or + * netmap_mem_ext_create() + */ + if (nmd) + netmap_mem_put(nmd); + NMG_UNLOCK(); + break; + } - error = netmap_mem_get_info(nmd, &memsize, &memflags, - &nmr->nr_arg2); - if (error) - break; - nmr->nr_memsize = (uint32_t)memsize; - if (na == NULL) /* only memory info */ - break; - nmr->nr_offset = 0; - nmr->nr_rx_slots = nmr->nr_tx_slots = 0; - netmap_update_config(na); - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - } while (0); - netmap_unget_na(na, ifp); - NMG_UNLOCK(); - break; + case NETMAP_REQ_PORT_INFO_GET: { + struct nmreq_port_info_get *req = + (struct nmreq_port_info_get *)hdr->nr_body; - case NIOCREGIF: - /* - * If nmr->nr_cmd is not zero, this NIOCREGIF is not really - * a regif operation, but a different one, specified by the - * value of nmr->nr_cmd. - */ - i = nmr->nr_cmd; - if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH - || i == NETMAP_BDG_VNET_HDR - || i == NETMAP_BDG_NEWIF - || i == NETMAP_BDG_DELIF - || i == NETMAP_BDG_POLLING_ON - || i == NETMAP_BDG_POLLING_OFF) { - /* possibly attach/detach NIC and VALE switch */ - error = netmap_bdg_ctl(nmr, NULL); + NMG_LOCK(); + do { + u_int memflags; + + if (hdr->nr_name[0] != '\0') { + /* Build a nmreq_register out of the nmreq_port_info_get, + * so that we can call netmap_get_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_tx_slots = req->nr_tx_slots; + regreq.nr_rx_slots = req->nr_rx_slots; + regreq.nr_tx_rings = req->nr_tx_rings; + regreq.nr_rx_rings = req->nr_rx_rings; + regreq.nr_mem_id = req->nr_mem_id; + + /* get a refcount */ + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */); + hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; /* reset type */ + hdr->nr_body = (uint64_t)req; /* reset nr_body */ + if (error) { + na = NULL; + ifp = NULL; + break; + } + nmd = na->nm_mem; /* get memory allocator */ + } else { + nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1); + if (nmd == NULL) { + error = EINVAL; + break; + } + } + + error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags, + &req->nr_mem_id); + if (error) + break; + if (na == NULL) /* only memory info */ + break; + req->nr_offset = 0; + req->nr_rx_slots = req->nr_tx_slots = 0; + netmap_update_config(na); + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + } while (0); + netmap_unget_na(na, ifp); + NMG_UNLOCK(); break; - } else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) { - /* forward the command to the ptnetmap subsystem */ - error = ptnetmap_ctl(nmr, priv->np_na); + } +#ifdef WITH_VALE + case NETMAP_REQ_VALE_ATTACH: { + error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */); break; - } else if (i == NETMAP_VNET_HDR_GET) { - /* get vnet-header length for this netmap port */ + } + + case NETMAP_REQ_VALE_DETACH: { + error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */); + break; + } + + case NETMAP_REQ_VALE_LIST: { + error = netmap_bdg_list(hdr); + break; + } + + case NETMAP_REQ_PORT_HDR_SET: { + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + /* Build a nmreq_register out of the nmreq_port_hdr, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + /* For now we only support virtio-net headers, and only for + * VALE ports, but this may change in future. Valid lengths + * for the virtio-net header are 0 (no header), 10 and 12. */ + if (req->nr_hdr_len != 0 && + req->nr_hdr_len != sizeof(struct nm_vnet_hdr) && + req->nr_hdr_len != 12) { + error = EINVAL; + break; + } + NMG_LOCK(); + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_bdg_na(hdr, &na, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET; + hdr->nr_body = (uint64_t)req; + if (na && !error) { + struct netmap_vp_adapter *vpna = + (struct netmap_vp_adapter *)na; + na->virt_hdr_len = req->nr_hdr_len; + if (na->virt_hdr_len) { + vpna->mfs = NETMAP_BUF_SIZE(na); + } + D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); + netmap_adapter_put(na); + } else if (!na) { + error = ENXIO; + } + NMG_UNLOCK(); + break; + } + + case NETMAP_REQ_PORT_HDR_GET: { + /* Get vnet-header length for this netmap port */ + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + /* Build a nmreq_register out of the nmreq_port_hdr, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; struct ifnet *ifp; + bzero(®req, sizeof(regreq)); NMG_LOCK(); - error = netmap_get_na(nmr, &na, &ifp, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 0); + hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET; + hdr->nr_body = (uint64_t)req; if (na && !error) { - nmr->nr_arg1 = na->virt_hdr_len; + req->nr_hdr_len = na->virt_hdr_len; } netmap_unget_na(na, ifp); NMG_UNLOCK(); break; - } else if (i == NETMAP_POOLS_INFO_GET) { - /* get information from the memory allocator */ + } + + case NETMAP_REQ_VALE_NEWIF: { + error = nm_vi_create(hdr); + break; + } + + case NETMAP_REQ_VALE_DELIF: { + error = nm_vi_destroy(hdr->nr_name); + break; + } + + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: { + error = nm_bdg_polling(hdr); + break; + } +#endif /* WITH_VALE */ + case NETMAP_REQ_POOLS_INFO_GET: { + struct nmreq_pools_info *req = + (struct nmreq_pools_info *)hdr->nr_body; + /* Get information from the memory allocator. This + * netmap device must already be bound to a port. + * Note that hdr->nr_name is ignored. */ NMG_LOCK(); if (priv->np_na && priv->np_na->nm_mem) { struct netmap_mem_d *nmd = priv->np_na->nm_mem; - error = netmap_mem_pools_info_get(nmr, nmd); + error = netmap_mem_pools_info_get(req, nmd); } else { error = EINVAL; } NMG_UNLOCK(); break; - } else if (i == NETMAP_POOLS_CREATE) { - nmd = netmap_mem_ext_create(nmr, &error); - if (nmd == NULL) - break; - /* reset the fields used by POOLS_CREATE to - * avoid confusing the rest of the code - */ - nmr->nr_cmd = 0; - nmr->nr_arg1 = 0; - nmr->nr_arg2 = 0; - nmr->nr_arg3 = 0; - } else if (i != 0) { - D("nr_cmd must be 0 not %d", i); + } + + default: { error = EINVAL; break; } - - /* protect access to priv from concurrent NIOCREGIF */ - NMG_LOCK(); - do { - u_int memflags; - uint64_t memsize; - - if (priv->np_nifp != NULL) { /* thread already registered */ - error = EBUSY; - break; - } - - if (nmr->nr_arg2) { - /* find the allocator and get a reference */ - nmd = netmap_mem_find(nmr->nr_arg2); - if (nmd == NULL) { - error = EINVAL; - break; - } - } - /* find the interface and a reference */ - error = netmap_get_na(nmr, &na, &ifp, nmd, - 1 /* create */); /* keep reference */ - if (error) - break; - if (NETMAP_OWNED_BY_KERN(na)) { - error = EBUSY; - break; - } - - if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) { - error = EIO; - break; - } - - error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags); - if (error) { /* reg. failed, release priv and ref */ - break; - } - nifp = priv->np_nifp; - priv->np_td = td; // XXX kqueue, debugging only - - /* return the offset of the netmap_if object */ - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags, - &nmr->nr_arg2); - if (error) { - netmap_do_unregif(priv); - break; - } - nmr->nr_memsize = (uint32_t)memsize; - if (memflags & NETMAP_MEM_PRIVATE) { - *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; - } - for_rx_tx(t) { - priv->np_si[t] = nm_si_user(priv, t) ? - &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si; - } - - if (nmr->nr_arg3) { - if (netmap_verbose) - D("requested %d extra buffers", nmr->nr_arg3); - nmr->nr_arg3 = netmap_extra_alloc(na, - &nifp->ni_bufs_head, nmr->nr_arg3); - if (netmap_verbose) - D("got %d extra buffers", nmr->nr_arg3); - } - nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); - - /* store ifp reference so that priv destructor may release it */ - priv->np_ifp = ifp; - } while (0); - if (error) { - netmap_unget_na(na, ifp); } - /* release the reference from netmap_mem_find() or - * netmap_mem_ext_create() - */ - if (nmd) - netmap_mem_put(nmd); - NMG_UNLOCK(); + /* Write back request body to userspace and reset the + * user-space pointer. */ + error = nmreq_copyout(hdr, error); break; + } case NIOCTXSYNC: - case NIOCRXSYNC: + case NIOCRXSYNC: { nifp = priv->np_nifp; if (nifp == NULL) { @@ -2506,7 +2647,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread sync_flags = priv->np_sync_flags; for (i = qfirst; i < qlast; i++) { - struct netmap_kring *kring = krings + i; + struct netmap_kring *kring = krings[i]; struct netmap_ring *ring = kring->ring; if (unlikely(nm_kr_tryget(kring, 1, &error))) { @@ -2549,51 +2690,292 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } break; + } -#ifdef WITH_VALE - case NIOCCONFIG: - error = netmap_bdg_config(nmr); - break; -#endif -#ifdef __FreeBSD__ - case FIONBIO: - case FIOASYNC: - ND("FIONBIO/FIOASYNC are no-ops"); + default: { + return netmap_ioctl_legacy(priv, cmd, data, td); break; + } + } + + return (error); +} - case BIOCIMMEDIATE: - case BIOCGHDRCMPLT: - case BIOCSHDRCMPLT: - case BIOCSSEESENT: - D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); +size_t +nmreq_size_by_type(uint16_t nr_reqtype) +{ + switch (nr_reqtype) { + case NETMAP_REQ_REGISTER: + return sizeof(struct nmreq_register); + case NETMAP_REQ_PORT_INFO_GET: + return sizeof(struct nmreq_port_info_get); + case NETMAP_REQ_VALE_ATTACH: + return sizeof(struct nmreq_vale_attach); + case NETMAP_REQ_VALE_DETACH: + return sizeof(struct nmreq_vale_detach); + case NETMAP_REQ_VALE_LIST: + return sizeof(struct nmreq_vale_list); + case NETMAP_REQ_PORT_HDR_SET: + case NETMAP_REQ_PORT_HDR_GET: + return sizeof(struct nmreq_port_hdr); + case NETMAP_REQ_VALE_NEWIF: + return sizeof(struct nmreq_vale_newif); + case NETMAP_REQ_VALE_DELIF: + return 0; + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: + return sizeof(struct nmreq_vale_polling); + case NETMAP_REQ_POOLS_INFO_GET: + return sizeof(struct nmreq_pools_info); + } + return 0; +} + +static size_t +nmreq_opt_size_by_type(uint16_t nro_reqtype) +{ + size_t rv = sizeof(struct nmreq_option); +#ifdef NETMAP_REQ_OPT_DEBUG + if (nro_reqtype & NETMAP_REQ_OPT_DEBUG) + return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG); +#endif /* NETMAP_REQ_OPT_DEBUG */ + switch (nro_reqtype) { +#ifdef WITH_EXTMEM + case NETMAP_REQ_OPT_EXTMEM: + rv = sizeof(struct nmreq_opt_extmem); break; +#endif /* WITH_EXTMEM */ + } + /* subtract the common header */ + return rv - sizeof(struct nmreq_option); +} - default: /* allow device-specific ioctls */ - { - struct ifnet *ifp = ifunit_ref(nmr->nr_name); - if (ifp == NULL) { - error = ENXIO; - } else { - struct socket so; +int +nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user) +{ + size_t rqsz, optsz, bufsz; + int error; + char *ker = NULL, *p; + struct nmreq_option **next, *src; + struct nmreq_option buf; + uint64_t *ptrs; + + if (hdr->nr_reserved) + return EINVAL; + + if (!nr_body_is_user) + return 0; + + hdr->nr_reserved = nr_body_is_user; + + /* compute the total size of the buffer */ + rqsz = nmreq_size_by_type(hdr->nr_reqtype); + if (rqsz > NETMAP_REQ_MAXSIZE) { + error = EMSGSIZE; + goto out_err; + } + if ((rqsz && hdr->nr_body == (uint64_t)NULL) || + (!rqsz && hdr->nr_body != (uint64_t)NULL)) { + /* Request body expected, but not found; or + * request body found but unexpected. */ + error = EINVAL; + goto out_err; + } - bzero(&so, sizeof(so)); - so.so_vnet = ifp->if_vnet; - // so->so_proto not null. - error = ifioctl(&so, cmd, data, td); - if_rele(ifp); + bufsz = 2 * sizeof(void *) + rqsz; + optsz = 0; + for (src = (struct nmreq_option *)hdr->nr_options; src; + src = (struct nmreq_option *)buf.nro_next) + { + error = copyin(src, &buf, sizeof(*src)); + if (error) + goto out_err; + optsz += sizeof(*src); + optsz += nmreq_opt_size_by_type(buf.nro_reqtype); + if (rqsz + optsz > NETMAP_REQ_MAXSIZE) { + error = EMSGSIZE; + goto out_err; } - break; - } + bufsz += optsz + sizeof(void *); + } -#else /* linux */ - default: - error = EOPNOTSUPP; -#endif /* linux */ + ker = nm_os_malloc(bufsz); + if (ker == NULL) { + error = ENOMEM; + goto out_err; } + p = ker; - return (error); + /* make a copy of the user pointers */ + ptrs = (uint64_t*)p; + *ptrs++ = hdr->nr_body; + *ptrs++ = hdr->nr_options; + p = (char *)ptrs; + + /* copy the body */ + error = copyin((void *)hdr->nr_body, p, rqsz); + if (error) + goto out_restore; + /* overwrite the user pointer with the in-kernel one */ + hdr->nr_body = (uint64_t)p; + p += rqsz; + + /* copy the options */ + next = (struct nmreq_option **)&hdr->nr_options; + src = *next; + while (src) { + struct nmreq_option *opt; + + /* copy the option header */ + ptrs = (uint64_t *)p; + opt = (struct nmreq_option *)(ptrs + 1); + error = copyin(src, opt, sizeof(*src)); + if (error) + goto out_restore; + /* make a copy of the user next pointer */ + *ptrs = opt->nro_next; + /* overwrite the user pointer with the in-kernel one */ + *next = opt; + + /* initialize the option as not supported. + * Recognized options will update this field. + */ + opt->nro_status = EOPNOTSUPP; + + p = (char *)(opt + 1); + + /* copy the option body */ + optsz = nmreq_opt_size_by_type(opt->nro_reqtype); + if (optsz) { + /* the option body follows the option header */ + error = copyin(src + 1, p, optsz); + if (error) + goto out_restore; + p += optsz; + } + + /* move to next option */ + next = (struct nmreq_option **)&opt->nro_next; + src = *next; + } + return 0; + +out_restore: + ptrs = (uint64_t *)ker; + hdr->nr_body = *ptrs++; + hdr->nr_options = *ptrs++; + hdr->nr_reserved = 0; + nm_os_free(ker); +out_err: + return error; } +static int +nmreq_copyout(struct nmreq_header *hdr, int rerror) +{ + struct nmreq_option *src, *dst; + void *ker = (void *)hdr->nr_body, *bufstart; + uint64_t *ptrs; + size_t bodysz; + int error; + + if (!hdr->nr_reserved) + return rerror; + + /* restore the user pointers in the header */ + ptrs = (uint64_t *)ker - 2; + bufstart = ptrs; + hdr->nr_body = *ptrs++; + src = (struct nmreq_option *)hdr->nr_options; + hdr->nr_options = *ptrs; + + if (!rerror) { + /* copy the body */ + bodysz = nmreq_size_by_type(hdr->nr_reqtype); + error = copyout(ker, (void *)hdr->nr_body, bodysz); + if (error) { + rerror = error; + goto out; + } + } + + /* copy the options */ + dst = (struct nmreq_option *)hdr->nr_options; + while (src) { + size_t optsz; + uint64_t next; + + /* restore the user pointer */ + next = src->nro_next; + ptrs = (uint64_t *)src - 1; + src->nro_next = *ptrs; + + /* always copy the option header */ + error = copyout(src, dst, sizeof(*src)); + if (error) { + rerror = error; + goto out; + } + + /* copy the option body only if there was no error */ + if (!rerror && !src->nro_status) { + optsz = nmreq_opt_size_by_type(src->nro_reqtype); + if (optsz) { + error = copyout(src + 1, dst + 1, optsz); + if (error) { + rerror = error; + goto out; + } + } + } + src = (struct nmreq_option *)next; + dst = (struct nmreq_option *)*ptrs; + } + + +out: + hdr->nr_reserved = 0; + nm_os_free(bufstart); + return rerror; +} + +struct nmreq_option * +nmreq_findoption(struct nmreq_option *opt, uint16_t reqtype) +{ + for ( ; opt; opt = (struct nmreq_option *)opt->nro_next) + if (opt->nro_reqtype == reqtype) + return opt; + return NULL; +} + +int +nmreq_checkduplicate(struct nmreq_option *opt) { + uint16_t type = opt->nro_reqtype; + int dup = 0; + + while ((opt = nmreq_findoption((struct nmreq_option *)opt->nro_next, + type))) { + dup++; + opt->nro_status = EINVAL; + } + return (dup ? EINVAL : 0); +} + +static int +nmreq_checkoptions(struct nmreq_header *hdr) +{ + struct nmreq_option *opt; + /* return error if there is still any option + * marked as not supported + */ + + for (opt = (struct nmreq_option *)hdr->nr_options; opt; + opt = (struct nmreq_option *)opt->nro_next) + if (opt->nro_status == EOPNOTSUPP) + return EOPNOTSUPP; + + return 0; +} /* * select(2) and poll(2) handlers for the "netmap" device. @@ -2680,7 +3062,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) if (want_tx) { enum txrx t = NR_TX; for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; /* XXX compare ring->cur and kring->tail */ if (!nm_ring_empty(kring->ring)) { revents |= want[t]; @@ -2692,7 +3074,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) enum txrx t = NR_RX; want_rx = 0; /* look for a reason to run the handlers */ for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; + kring = NMR(na, t)[i]; if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */ || kring->rhead != kring->ring->head /* release buffers */) { want_rx = 1; @@ -2706,9 +3088,9 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) #ifdef linux /* The selrecord must be unconditional on linux. */ nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); #endif /* linux */ /* @@ -2728,16 +3110,16 @@ flush_tx: for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) { int found = 0; - kring = &na->tx_rings[i]; + kring = na->tx_rings[i]; ring = kring->ring; /* * Don't try to txsync this TX ring if we already found some * space in some of the TX rings (want_tx == 0) and there are no * TX slots in this ring that need to be flushed to the NIC - * (cur == hwcur). + * (head == hwcur). */ - if (!send_down && !want_tx && ring->cur == kring->nr_hwcur) + if (!send_down && !want_tx && ring->head == kring->nr_hwcur) continue; if (nm_kr_tryget(kring, 1, &revents)) @@ -2774,7 +3156,7 @@ flush_tx: if (want_tx && retry_tx && sr) { #ifndef linux nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); #endif /* !linux */ retry_tx = 0; goto flush_tx; @@ -2791,7 +3173,7 @@ do_retry_rx: for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) { int found = 0; - kring = &na->rx_rings[i]; + kring = na->rx_rings[i]; ring = kring->ring; if (unlikely(nm_kr_tryget(kring, 1, &revents))) @@ -2835,7 +3217,7 @@ do_retry_rx: #ifndef linux if (retry_rx && sr) { nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); } #endif /* !linux */ if (send_down || retry_rx) { @@ -2871,7 +3253,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff) for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; int on = !(kring->nr_kflags & NKR_NOINTR); if (!!onoff != !!on) { @@ -2907,7 +3289,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff) static int netmap_notify(struct netmap_kring *kring, int flags) { - struct netmap_adapter *na = kring->na; + struct netmap_adapter *na = kring->notify_na; enum txrx t = kring->tx; nm_os_selwakeup(&kring->si); @@ -2934,6 +3316,11 @@ netmap_attach_common(struct netmap_adapter *na) return EINVAL; } + if (!na->rx_buf_maxsize) { + /* Set a conservative default (larger is safer). */ + na->rx_buf_maxsize = PAGE_SIZE; + } + #ifdef __FreeBSD__ if (na->na_flags & NAF_HOST_RINGS && na->ifp) { na->if_input = na->ifp->if_input; /* for netmap_send_up */ @@ -3149,7 +3536,7 @@ netmap_hw_krings_create(struct netmap_adapter *na) int ret = netmap_krings_create(na, 0); if (ret == 0) { /* initialize the mbq for the sw rx ring */ - mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue); + mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue); ND("initialized sw rx queue %d", na->num_rx_rings); } return ret; @@ -3213,7 +3600,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) struct mbq *q; int busy; - kring = &na->rx_rings[na->num_rx_rings]; + kring = na->rx_rings[na->num_rx_rings]; // XXX [Linux] we do not need this lock // if we follow the down/configure/up protocol -gl // mtx_lock(&na->core_lock); @@ -3228,7 +3615,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) if (txr >= na->num_tx_rings) { txr %= na->num_tx_rings; } - tx_kring = &NMR(na, NR_TX)[txr]; + tx_kring = NMR(na, NR_TX)[txr]; if (tx_kring->nr_mode == NKR_NETMAP_OFF) { return MBUF_TRANSMIT(na, ifp, m); @@ -3316,7 +3703,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, if (n >= na->num_tx_rings) return NULL; - kring = na->tx_rings + n; + kring = na->tx_rings[n]; if (kring->nr_pending_mode == NKR_NETMAP_OFF) { kring->nr_mode = NKR_NETMAP_OFF; @@ -3328,7 +3715,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, } else { if (n >= na->num_rx_rings) return NULL; - kring = na->rx_rings + n; + kring = na->rx_rings[n]; if (kring->nr_pending_mode == NKR_NETMAP_OFF) { kring->nr_mode = NKR_NETMAP_OFF; @@ -3396,7 +3783,7 @@ netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done) if (q >= nma_get_nrings(na, t)) return NM_IRQ_PASS; // not a physical queue - kring = NMR(na, t) + q; + kring = NMR(na, t)[q]; if (kring->nr_mode == NKR_NETMAP_OFF) { return NM_IRQ_PASS; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index c122dc64bed26..cc63b4b478617 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -619,6 +619,116 @@ nm_os_vi_detach(struct ifnet *ifp) if_free(ifp); } +#ifdef WITH_EXTMEM +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +struct nm_os_extmem { + vm_object_t obj; + vm_offset_t kva; + vm_offset_t size; + vm_pindex_t scan; +}; + +void +nm_os_extmem_delete(struct nm_os_extmem *e) +{ + D("freeing %lx bytes", e->size); + vm_map_remove(kernel_map, e->kva, e->kva + e->size); + nm_os_free(e); +} + +char * +nm_os_extmem_nextpage(struct nm_os_extmem *e) +{ + char *rv = NULL; + if (e->scan < e->kva + e->size) { + rv = (char *)e->scan; + e->scan += PAGE_SIZE; + } + return rv; +} + +int +nm_os_extmem_isequal(struct nm_os_extmem *e1, struct nm_os_extmem *e2) +{ + return (e1->obj == e1->obj); +} + +int +nm_os_extmem_nr_pages(struct nm_os_extmem *e) +{ + return e->size >> PAGE_SHIFT; +} + +struct nm_os_extmem * +nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror) +{ + vm_map_t map; + vm_map_entry_t entry; + vm_object_t obj; + vm_prot_t prot; + vm_pindex_t index; + boolean_t wired; + struct nm_os_extmem *e = NULL; + int rv, error = 0; + + e = nm_os_malloc(sizeof(*e)); + if (e == NULL) { + error = ENOMEM; + goto out; + } + + map = &curthread->td_proc->p_vmspace->vm_map; + rv = vm_map_lookup(&map, p, VM_PROT_RW, &entry, + &obj, &index, &prot, &wired); + if (rv != KERN_SUCCESS) { + D("address %lx not found", p); + goto out_free; + } + /* check that we are given the whole vm_object ? */ + vm_map_lookup_done(map, entry); + + // XXX can we really use obj after releasing the map lock? + e->obj = obj; + vm_object_reference(obj); + /* wire the memory and add the vm_object to the kernel map, + * to make sure that it is not fred even if the processes that + * are mmap()ing it all exit + */ + e->kva = vm_map_min(kernel_map); + e->size = obj->size << PAGE_SHIFT; + rv = vm_map_find(kernel_map, obj, 0, &e->kva, e->size, 0, + VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_READ | VM_PROT_WRITE, 0); + if (rv != KERN_SUCCESS) { + D("vm_map_find(%lx) failed", e->size); + goto out_rel; + } + rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size, + VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); + if (rv != KERN_SUCCESS) { + D("vm_map_wire failed"); + goto out_rem; + } + + e->scan = e->kva; + + return e; + +out_rem: + vm_map_remove(kernel_map, e->kva, e->kva + e->size); + e->obj = NULL; +out_rel: + vm_object_deallocate(e->obj); +out_free: + nm_os_free(e); +out: + if (perror) + *perror = error; + return NULL; +} +#endif /* WITH_EXTMEM */ + /* ======================== PTNETMAP SUPPORT ========================== */ #ifdef WITH_PTNETMAP_GUEST @@ -1151,16 +1261,10 @@ nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity) } struct nm_kctx * -nm_os_kctx_create(struct nm_kctx_cfg *cfg, unsigned int cfgtype, - void *opaque) +nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque) { struct nm_kctx *nmk = NULL; - if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) { - D("Unsupported cfgtype %u", cfgtype); - return NULL; - } - nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO); if (!nmk) return NULL; @@ -1429,7 +1533,7 @@ freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, error = ENXIO; goto out; } - error = netmap_ioctl(priv, cmd, data, td); + error = netmap_ioctl(priv, cmd, data, td, /*nr_body_is_user=*/1); out: CURVNET_RESTORE(); diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 2ed251a557756..24d36d5db1b30 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -232,7 +232,7 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #define for_each_kring_n(_i, _k, _karr, _n) \ - for (_k=_karr, _i = 0; _i < _n; (_k)++, (_i)++) + for ((_k)=*(_karr), (_i) = 0; (_i) < (_n); (_i)++, (_k) = (_karr)[(_i)]) #define for_each_tx_kring(_i, _k, _na) \ for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings) @@ -589,7 +589,7 @@ generic_mbuf_destructor(struct mbuf *m) for (;;) { bool match = false; - kring = &na->tx_rings[r]; + kring = na->tx_rings[r]; mtx_lock_spin(&kring->tx_event_lock); if (kring->tx_event == m) { kring->tx_event = NULL; @@ -953,7 +953,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m) r = r % na->num_rx_rings; } - kring = &na->rx_rings[r]; + kring = na->rx_rings[r]; if (kring->nr_mode == NKR_NETMAP_OFF) { /* We must not intercept this mbuf. */ diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 3e64510913242..8fc71b8e820ef 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -77,7 +77,7 @@ #define WITH_GENERIC #define WITH_PTNETMAP_HOST /* ptnetmap host support */ #define WITH_PTNETMAP_GUEST /* ptnetmap guest support */ - +#define WITH_EXTMEM #endif #if defined(__FreeBSD__) @@ -367,9 +367,6 @@ struct netmap_zmon_list { * the next empty buffer as known by the hardware (next_to_check or so). * TX rings: hwcur + hwofs coincides with next_to_send * - * For received packets, slot->flags is set to nkr_slot_flags - * so we can provide a proper initial value. - * * The following fields are used to implement lock-free copy of packets * from input to output ports in VALE switch: * nkr_hwlease buffer after the last one being copied. @@ -401,7 +398,7 @@ struct netmap_zmon_list { struct netmap_kring { struct netmap_ring *ring; - uint32_t nr_hwcur; + uint32_t nr_hwcur; /* should be nr_hwhead */ uint32_t nr_hwtail; /* @@ -424,6 +421,7 @@ struct netmap_kring { * by ptnetmap host ports) */ #define NKR_NOINTR 0x10 /* don't use interrupts on this ring */ +#define NKR_FAKERING 0x20 /* don't allocate/free buffers */ uint32_t nr_mode; uint32_t nr_pending_mode; @@ -450,7 +448,14 @@ struct netmap_kring { NM_LOCK_T q_lock; /* protects kring and ring. */ NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */ + /* the adapter the owns this kring */ struct netmap_adapter *na; + + /* the adapter that wants to be notified when this kring has + * new slots avaialable. This is usually the same as the above, + * but wrappers may let it point to themselves + */ + struct netmap_adapter *notify_na; /* The following fields are for VALE switch support */ struct nm_bdg_fwd *nkr_ft; @@ -630,6 +635,15 @@ struct netmap_lut { struct netmap_vp_adapter; // forward +/* Struct to be filled by nm_config callbacks. */ +struct nm_config_info { + unsigned num_tx_rings; + unsigned num_rx_rings; + unsigned num_tx_descs; + unsigned num_rx_descs; + unsigned rx_buf_maxsize; +}; + /* * The "struct netmap_adapter" extends the "struct adapter" * (or equivalent) device descriptor. @@ -690,8 +704,8 @@ struct netmap_adapter { * as a contiguous chunk of memory. Each array has * N+1 entries, for the adapter queues and for the host queue. */ - struct netmap_kring *tx_rings; /* array of TX rings. */ - struct netmap_kring *rx_rings; /* array of RX rings. */ + struct netmap_kring **tx_rings; /* array of TX rings. */ + struct netmap_kring **rx_rings; /* array of RX rings. */ void *tailroom; /* space below the rings array */ /* (used for leases) */ @@ -766,8 +780,7 @@ struct netmap_adapter { #define NAF_FORCE_RECLAIM 2 #define NAF_CAN_FORWARD_DOWN 4 /* return configuration information */ - int (*nm_config)(struct netmap_adapter *, - u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); + int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info); int (*nm_krings_create)(struct netmap_adapter *); void (*nm_krings_delete)(struct netmap_adapter *); #ifdef WITH_VALE @@ -787,7 +800,7 @@ struct netmap_adapter { * Called with NMG_LOCK held. */ int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *); - int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int); + int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *); /* adapter used to attach this adapter to a VALE switch (if any) */ struct netmap_vp_adapter *na_vp; @@ -823,7 +836,13 @@ struct netmap_adapter { /* Offset of ethernet header for each packet. */ u_int virt_hdr_len; - char name[64]; + /* Max number of bytes that the NIC can store in the buffer + * referenced by each RX descriptor. This translates to the maximum + * bytes that a single netmap slot can reference. Larger packets + * require NS_MOREFRAG support. */ + unsigned rx_buf_maxsize; + + char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */ }; static __inline u_int @@ -856,7 +875,7 @@ nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v) na->num_rx_rings = v; } -static __inline struct netmap_kring* +static __inline struct netmap_kring** NMR(struct netmap_adapter *na, enum txrx t) { return (t == NR_TX ? na->tx_rings : na->rx_rings); @@ -1011,12 +1030,22 @@ struct netmap_bwrap_adapter { */ struct netmap_priv_d *na_kpriv; struct nm_bdg_polling_state *na_polling_state; + /* we overwrite the hwna->na_vp pointer, so we save + * here its original value, to be restored at detach + */ + struct netmap_vp_adapter *saved_na_vp; }; +int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token); +int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token); +int nm_bdg_polling(struct nmreq_header *hdr); int netmap_bwrap_attach(const char *name, struct netmap_adapter *); -int netmap_vi_create(struct nmreq *, int); +int netmap_vi_create(struct nmreq_header *hdr, int); +int nm_vi_create(struct nmreq_header *); +int nm_vi_destroy(const char *name); +int netmap_bdg_list(struct nmreq_header *hdr); #else /* !WITH_VALE */ -#define netmap_vi_create(nmr, a) (EOPNOTSUPP) +#define netmap_vi_create(hdr, a) (EOPNOTSUPP) #endif /* WITH_VALE */ #ifdef WITH_PIPES @@ -1024,10 +1053,12 @@ int netmap_vi_create(struct nmreq *, int); #define NM_MAXPIPES 64 /* max number of pipes per adapter */ struct netmap_pipe_adapter { + /* pipe identifier is up.name */ struct netmap_adapter up; - u_int id; /* pipe identifier */ - int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */ +#define NM_PIPE_ROLE_MASTER 0x1 +#define NM_PIPE_ROLE_SLAVE 0x2 + int role; /* either NM_PIPE_ROLE_MASTER or NM_PIPE_ROLE_SLAVE */ struct netmap_adapter *parent; /* adapter that owns the memory */ struct netmap_pipe_adapter *peer; /* the other end of the pipe */ @@ -1195,6 +1226,7 @@ int netmap_transmit(struct ifnet *, struct mbuf *); struct netmap_slot *netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, u_int new_cur); int netmap_ring_reinit(struct netmap_kring *); +int netmap_rings_config_get(struct netmap_adapter *, struct nm_config_info *); /* Return codes for netmap_*x_irq. */ enum { @@ -1255,10 +1287,10 @@ static inline void nm_update_hostrings_mode(struct netmap_adapter *na) { /* Process nr_mode and nr_pending_mode for host rings. */ - na->tx_rings[na->num_tx_rings].nr_mode = - na->tx_rings[na->num_tx_rings].nr_pending_mode; - na->rx_rings[na->num_rx_rings].nr_mode = - na->rx_rings[na->num_rx_rings].nr_pending_mode; + na->tx_rings[na->num_tx_rings]->nr_mode = + na->tx_rings[na->num_tx_rings]->nr_pending_mode; + na->rx_rings[na->num_rx_rings]->nr_mode = + na->rx_rings[na->num_rx_rings]->nr_pending_mode; } /* set/clear native flags and if_transmit/netdev_ops */ @@ -1318,6 +1350,11 @@ nm_clear_native_flags(struct netmap_adapter *na) #endif } +#ifdef linux +int netmap_linux_config(struct netmap_adapter *na, + struct nm_config_info *info); +#endif /* linux */ + /* * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap * kthreads. @@ -1373,9 +1410,10 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *); */ int netmap_attach_common(struct netmap_adapter *); /* fill priv->np_[tr]xq{first,last} using the ringid and flags information - * coming from a struct nmreq + * coming from a struct nmreq_register */ -int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags); +int netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode, + uint16_t nr_ringid, uint64_t nr_flags); /* update the ring parameters (number and size of tx and rx rings). * It calls the nm_config callback, if available. */ @@ -1409,12 +1447,12 @@ void netmap_disable_all_rings(struct ifnet *); void netmap_enable_all_rings(struct ifnet *); int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags); + uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags); void netmap_do_unregif(struct netmap_priv_d *priv); u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); -int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, - struct ifnet **ifp, struct netmap_mem_d *nmd, int create); +int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct ifnet **ifp, struct netmap_mem_d *nmd, int create); void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp); int netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na); @@ -1430,18 +1468,19 @@ int netmap_get_hw_na(struct ifnet *ifp, * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate * drop. */ -typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, - struct netmap_vp_adapter *); +typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, + struct netmap_vp_adapter *, void *private_data); typedef int (*bdg_config_fn_t)(struct nm_ifreq *); typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); +typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); struct netmap_bdg_ops { bdg_lookup_fn_t lookup; bdg_config_fn_t config; bdg_dtor_fn_t dtor; }; -u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - struct netmap_vp_adapter *); +uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, + struct netmap_vp_adapter *, void *private_data); #define NM_BRIDGES 8 /* number of bridges */ #define NM_BDG_MAXPORTS 254 /* up to 254 */ @@ -1449,45 +1488,47 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) /* these are redefined in case of no VALE support */ -int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, +int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); struct nm_bridge *netmap_init_bridges2(u_int); void netmap_uninit_bridges2(struct nm_bridge *, u_int); int netmap_init_bridges(void); void netmap_uninit_bridges(void); -int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops); -int netmap_bdg_config(struct nmreq *nmr); +int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); +int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token); +int netmap_bdg_config(struct nm_ifreq *nifr); +void *netmap_bdg_create(const char *bdg_name, int *return_status); +int netmap_bdg_destroy(const char *bdg_name, void *auth_token); #else /* !WITH_VALE */ #define netmap_get_bdg_na(_1, _2, _3, _4) 0 #define netmap_init_bridges(_1) 0 #define netmap_uninit_bridges() -#define netmap_bdg_ctl(_1, _2) EINVAL +#define netmap_bdg_regops(_1, _2) EINVAL #endif /* !WITH_VALE */ #ifdef WITH_PIPES /* max number of pipes per device */ #define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */ void netmap_pipe_dealloc(struct netmap_adapter *); -int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create); +int netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create); #else /* !WITH_PIPES */ #define NM_MAXPIPES 0 #define netmap_pipe_alloc(_1, _2) 0 #define netmap_pipe_dealloc(_1) -#define netmap_get_pipe_na(nmr, _2, _3, _4) \ - ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \ - (role__ == NR_REG_PIPE_MASTER || \ - role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; }) +#define netmap_get_pipe_na(hdr, _2, _3, _4) \ + ((strchr(hdr->nr_name, '{') != NULL || strchr(hdr->nr_name, '}') != NULL) ? EOPNOTSUPP : 0) #endif #ifdef WITH_MONITOR -int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, +int netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); void netmap_monitor_stop(struct netmap_adapter *na); #else -#define netmap_get_monitor_na(nmr, _2, _3, _4) \ - ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) +#define netmap_get_monitor_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) #endif #ifdef CONFIG_NET_NS @@ -1508,7 +1549,11 @@ void netmap_fini(void); int netmap_get_memory(struct netmap_priv_d* p); void netmap_dtor(void *data); -int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *); +int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *, int nr_body_is_user); +int netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td); +size_t nmreq_size_by_type(uint16_t nr_reqtype); /* netmap_adapter creation/destruction */ @@ -1871,7 +1916,7 @@ static inline int nm_kring_pending(struct netmap_priv_d *np) for_rx_tx(t) { for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (kring->nr_mode != kring->nr_pending_mode) { return 1; } @@ -1980,7 +2025,7 @@ void nm_os_mitigation_cleanup(struct nm_generic_mit *mit); struct nm_bdg_fwd { /* forwarding entry for a bridge */ void *ft_buf; /* netmap or indirect buffer */ uint8_t ft_frags; /* how many fragments (only on 1st frag) */ - uint8_t _ft_port; /* dst port (unused) */ + uint16_t ft_offset; /* dst port (unused) */ uint16_t ft_flags; /* flags, e.g. indirect */ uint16_t ft_len; /* src fragment len */ uint16_t ft_next; /* next packet to same destination */ @@ -2094,7 +2139,6 @@ struct nm_kctx_cfg { }; /* kthread configuration */ struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg, - unsigned int cfgtype, void *opaque); int nm_os_kctx_worker_start(struct nm_kctx *); void nm_os_kctx_worker_stop(struct nm_kctx *); @@ -2120,19 +2164,21 @@ struct netmap_pt_host_adapter { int (*parent_nm_notify)(struct netmap_kring *kring, int flags); void *ptns; }; -/* ptnetmap HOST routines */ -int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d * nmd, int create); -int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na); + +/* ptnetmap host-side routines */ +int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d * nmd, int create); +int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na); + static inline int nm_ptnetmap_host_on(struct netmap_adapter *na) { return na && na->na_flags & NAF_PTNETMAP_HOST; } #else /* !WITH_PTNETMAP_HOST */ -#define netmap_get_pt_host_na(nmr, _2, _3, _4) \ - ((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) -#define ptnetmap_ctl(_1, _2) EINVAL +#define netmap_get_pt_host_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) +#define ptnetmap_ctl(_1, _2, _3) EINVAL #define nm_ptnetmap_host_on(_1) EINVAL #endif /* !WITH_PTNETMAP_HOST */ @@ -2175,4 +2221,7 @@ void ptnet_nm_krings_delete(struct netmap_adapter *na); void ptnet_nm_dtor(struct netmap_adapter *na); #endif /* WITH_PTNETMAP_GUEST */ +struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t); +int nmreq_checkduplicate(struct nmreq_option *); + #endif /* _NET_NETMAP_KERN_H_ */ diff --git a/sys/dev/netmap/netmap_legacy.c b/sys/dev/netmap/netmap_legacy.c new file mode 100644 index 0000000000000..da0d622958d9b --- /dev/null +++ b/sys/dev/netmap/netmap_legacy.c @@ -0,0 +1,428 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2018 Vincenzo Maffione + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* $FreeBSD$ */ + +#if defined(__FreeBSD__) +#include <sys/cdefs.h> /* prerequisite */ +#include <sys/types.h> +#include <sys/param.h> /* defines used in kernel.h */ +#include <sys/filio.h> /* FIONBIO */ +#include <sys/malloc.h> +#include <sys/socketvar.h> /* struct socket */ +#include <sys/socket.h> /* sockaddrs */ +#include <sys/sysctl.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/bpf.h> /* BIOCIMMEDIATE */ +#include <machine/bus.h> /* bus_dmamap_* */ +#include <sys/endian.h> +#elif defined(linux) +#include "bsd_glue.h" +#elif defined(__APPLE__) +#warning OSX support is only partial +#include "osx_glue.h" +#elif defined (_WIN32) +#include "win_glue.h" +#endif + +/* + * common headers + */ +#include <net/netmap.h> +#include <dev/netmap/netmap_kern.h> + +static int +nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr, + struct nmreq_register *req) +{ + req->nr_offset = nmr->nr_offset; + req->nr_memsize = nmr->nr_memsize; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + req->nr_ringid = nmr->nr_ringid & NETMAP_RING_MASK; + if ((nmr->nr_flags & NR_REG_MASK) == NR_REG_DEFAULT) { + /* Convert the older nmr->nr_ringid (original + * netmap control API) to nmr->nr_flags. */ + u_int regmode = NR_REG_DEFAULT; + if (req->nr_ringid & NETMAP_SW_RING) { + regmode = NR_REG_SW; + } else if (req->nr_ringid & NETMAP_HW_RING) { + regmode = NR_REG_ONE_NIC; + } else { + regmode = NR_REG_ALL_NIC; + } + nmr->nr_flags = regmode | + (nmr->nr_flags & (~NR_REG_MASK)); + } + req->nr_mode = nmr->nr_flags & NR_REG_MASK; + /* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */ + if (req->nr_mode == NR_REG_PIPE_MASTER || + req->nr_mode == NR_REG_PIPE_SLAVE) { + char suffix[10]; + snprintf(suffix, sizeof(suffix), "%c%d", + (req->nr_mode == NR_REG_PIPE_MASTER ? '{' : '}'), + req->nr_ringid); + if (strlen(hdr->nr_name) + strlen(suffix) + >= sizeof(hdr->nr_name)) { + /* No space for the pipe suffix. */ + return ENOBUFS; + } + strncat(hdr->nr_name, suffix, strlen(suffix)); + req->nr_mode = NR_REG_ALL_NIC; + req->nr_ringid = 0; + } + req->nr_flags = nmr->nr_flags & (~NR_REG_MASK); + if (nmr->nr_ringid & NETMAP_NO_TX_POLL) { + req->nr_flags |= NR_NO_TX_POLL; + } + if (nmr->nr_ringid & NETMAP_DO_RX_POLL) { + req->nr_flags |= NR_DO_RX_POLL; + } + /* nmr->nr_arg1 (nr_pipes) ignored */ + req->nr_extra_bufs = nmr->nr_arg3; + + return 0; +} + +/* Convert the legacy 'nmr' struct into one of the nmreq_xyz structs + * (new API). The new struct is dynamically allocated. */ +static struct nmreq_header * +nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd) +{ + struct nmreq_header *hdr = nm_os_malloc(sizeof(*hdr)); + + if (hdr == NULL) { + goto oom; + } + + /* Sanitize nmr->nr_name by adding the string terminator. */ + if (ioctl_cmd == NIOCGINFO || ioctl_cmd == NIOCREGIF) { + nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; + } + + /* First prepare the request header. */ + hdr->nr_version = NETMAP_API; /* new API */ + strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); + hdr->nr_options = (uint64_t)NULL; + hdr->nr_body = (uint64_t)NULL; + + switch (ioctl_cmd) { + case NIOCREGIF: { + switch (nmr->nr_cmd) { + case 0: { + /* Regular NIOCREGIF operation. */ + struct nmreq_register *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + if (nmreq_register_from_legacy(nmr, hdr, req)) { + goto oom; + } + break; + } + case NETMAP_BDG_ATTACH: { + struct nmreq_vale_attach *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_ATTACH; + if (nmreq_register_from_legacy(nmr, hdr, &req->reg)) { + goto oom; + } + /* Fix nr_mode, starting from nr_arg1. */ + if (nmr->nr_arg1 & NETMAP_BDG_HOST) { + req->reg.nr_mode = NR_REG_NIC_SW; + } else { + req->reg.nr_mode = NR_REG_ALL_NIC; + } + break; + } + case NETMAP_BDG_DETACH: { + hdr->nr_reqtype = NETMAP_REQ_VALE_DETACH; + hdr->nr_body = (uint64_t)nm_os_malloc(sizeof(struct nmreq_vale_detach)); + break; + } + case NETMAP_BDG_VNET_HDR: + case NETMAP_VNET_HDR_GET: { + struct nmreq_port_hdr *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_VNET_HDR) ? + NETMAP_REQ_PORT_HDR_SET : NETMAP_REQ_PORT_HDR_GET; + req->nr_hdr_len = nmr->nr_arg1; + break; + } + case NETMAP_BDG_NEWIF : { + struct nmreq_vale_newif *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + break; + } + case NETMAP_BDG_DELIF: { + hdr->nr_reqtype = NETMAP_REQ_VALE_DELIF; + break; + } + case NETMAP_BDG_POLLING_ON: + case NETMAP_BDG_POLLING_OFF: { + struct nmreq_vale_polling *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_POLLING_ON) ? + NETMAP_REQ_VALE_POLLING_ENABLE : + NETMAP_REQ_VALE_POLLING_DISABLE; + switch (nmr->nr_flags & NR_REG_MASK) { + default: + req->nr_mode = 0; /* invalid */ + break; + case NR_REG_ONE_NIC: + req->nr_mode = NETMAP_POLLING_MODE_MULTI_CPU; + break; + case NR_REG_ALL_NIC: + req->nr_mode = NETMAP_POLLING_MODE_SINGLE_CPU; + break; + } + req->nr_first_cpu_id = nmr->nr_ringid & NETMAP_RING_MASK; + req->nr_num_polling_cpus = nmr->nr_arg1; + break; + } + case NETMAP_PT_HOST_CREATE: + case NETMAP_PT_HOST_DELETE: { + D("Netmap passthrough not supported yet"); + return NULL; + break; + } + } + break; + } + case NIOCGINFO: { + if (nmr->nr_cmd == NETMAP_BDG_LIST) { + struct nmreq_vale_list *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_VALE_LIST; + req->nr_bridge_idx = nmr->nr_arg1; + req->nr_port_idx = nmr->nr_arg2; + } else { + /* Regular NIOCGINFO. */ + struct nmreq_port_info_get *req = nm_os_malloc(sizeof(*req)); + if (!req) { goto oom; } + hdr->nr_body = (uint64_t)req; + hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; + req->nr_offset = nmr->nr_offset; + req->nr_memsize = nmr->nr_memsize; + req->nr_tx_slots = nmr->nr_tx_slots; + req->nr_rx_slots = nmr->nr_rx_slots; + req->nr_tx_rings = nmr->nr_tx_rings; + req->nr_rx_rings = nmr->nr_rx_rings; + req->nr_mem_id = nmr->nr_arg2; + } + break; + } + } + + return hdr; +oom: + if (hdr) { + if (hdr->nr_body) { + nm_os_free((void *)hdr->nr_body); + } + nm_os_free(hdr); + } + D("Failed to allocate memory for nmreq_xyz struct"); + + return NULL; +} + +static void +nmreq_register_to_legacy(const struct nmreq_register *req, struct nmreq *nmr) +{ + nmr->nr_offset = req->nr_offset; + nmr->nr_memsize = req->nr_memsize; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + nmr->nr_arg3 = req->nr_extra_bufs; +} + +/* Convert a nmreq_xyz struct (new API) to the legacy 'nmr' struct. + * It also frees the nmreq_xyz struct, as it was allocated by + * nmreq_from_legacy(). */ +static int +nmreq_to_legacy(struct nmreq_header *hdr, struct nmreq *nmr) +{ + int ret = 0; + + /* We only write-back the fields that the user expects to be + * written back. */ + switch (hdr->nr_reqtype) { + case NETMAP_REQ_REGISTER: { + struct nmreq_register *req = + (struct nmreq_register *)hdr->nr_body; + nmreq_register_to_legacy(req, nmr); + break; + } + case NETMAP_REQ_PORT_INFO_GET: { + struct nmreq_port_info_get *req = + (struct nmreq_port_info_get *)hdr->nr_body; + nmr->nr_offset = req->nr_offset; + nmr->nr_memsize = req->nr_memsize; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + break; + } + case NETMAP_REQ_VALE_ATTACH: { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + nmreq_register_to_legacy(&req->reg, nmr); + break; + } + case NETMAP_REQ_VALE_DETACH: { + break; + } + case NETMAP_REQ_VALE_LIST: { + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)hdr->nr_body; + strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); + nmr->nr_arg1 = req->nr_bridge_idx; + nmr->nr_arg2 = req->nr_port_idx; + break; + } + case NETMAP_REQ_PORT_HDR_SET: + case NETMAP_REQ_PORT_HDR_GET: { + struct nmreq_port_hdr *req = + (struct nmreq_port_hdr *)hdr->nr_body; + nmr->nr_arg1 = req->nr_hdr_len; + break; + } + case NETMAP_REQ_VALE_NEWIF: { + struct nmreq_vale_newif *req = + (struct nmreq_vale_newif *)hdr->nr_body; + nmr->nr_tx_slots = req->nr_tx_slots; + nmr->nr_rx_slots = req->nr_rx_slots; + nmr->nr_tx_rings = req->nr_tx_rings; + nmr->nr_rx_rings = req->nr_rx_rings; + nmr->nr_arg2 = req->nr_mem_id; + break; + } + case NETMAP_REQ_VALE_DELIF: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: { + break; + } + } + + return ret; +} + +int +netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data, + struct thread *td) +{ + int error = 0; + + switch (cmd) { + case NIOCGINFO: + case NIOCREGIF: { + /* Request for the legacy control API. Convert it to a + * NIOCCTRL request. */ + struct nmreq *nmr = (struct nmreq *) data; + struct nmreq_header *hdr = nmreq_from_legacy(nmr, cmd); + if (hdr == NULL) { /* out of memory */ + return ENOMEM; + } + error = netmap_ioctl(priv, NIOCCTRL, (caddr_t)hdr, td, + /*nr_body_is_user=*/0); + if (error == 0) { + nmreq_to_legacy(hdr, nmr); + } + if (hdr->nr_body) { + nm_os_free((void *)hdr->nr_body); + } + nm_os_free(hdr); + break; + } +#ifdef WITH_VALE + case NIOCCONFIG: { + struct nm_ifreq *nr = (struct nm_ifreq *)data; + error = netmap_bdg_config(nr); + break; + } +#endif +#ifdef __FreeBSD__ + case FIONBIO: + case FIOASYNC: + ND("FIONBIO/FIOASYNC are no-ops"); + break; + + case BIOCIMMEDIATE: + case BIOCGHDRCMPLT: + case BIOCSHDRCMPLT: + case BIOCSSEESENT: + D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT"); + break; + + default: /* allow device-specific ioctls */ + { + struct nmreq *nmr = (struct nmreq *)data; + struct ifnet *ifp = ifunit_ref(nmr->nr_name); + if (ifp == NULL) { + error = ENXIO; + } else { + struct socket so; + + bzero(&so, sizeof(so)); + so.so_vnet = ifp->if_vnet; + // so->so_proto not null. + error = ifioctl(&so, cmd, data, td); + if_rele(ifp); + } + break; + } + +#else /* linux */ + default: + error = EOPNOTSUPP; +#endif /* linux */ + } + + return error; +} diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 1f206a1b02927..b6d2d7e817d61 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -110,6 +110,7 @@ struct netmap_obj_pool { uint32_t *bitmap; /* one bit per buffer, 1 means free */ uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */ uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ + int alloc_done; /* we have allocated the memory */ /* ---------------------------------------------------*/ /* limits */ @@ -131,7 +132,11 @@ struct netmap_obj_pool { }; #define NMA_LOCK_T NM_MTX_T - +#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) +#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) +#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) +#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) +#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) struct netmap_mem_ops { int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); @@ -179,56 +184,126 @@ struct netmap_mem_d { char name[NM_MEM_NAMESZ]; }; -/* - * XXX need to fix the case of t0 == void - */ -#define NMD_DEFCB(t0, name) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd) \ -{ \ - return nmd->ops->nmd_##name(nmd); \ +int +netmap_mem_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) +{ + int rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_get_lut(nmd, lut); + NMA_UNLOCK(nmd); + + return rv; +} + +int +netmap_mem_get_info(struct netmap_mem_d *nmd, uint64_t *size, + u_int *memflags, nm_memid_t *memid) +{ + int rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_get_info(nmd, size, memflags, memid); + NMA_UNLOCK(nmd); + + return rv; +} + +vm_paddr_t +netmap_mem_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off) +{ + vm_paddr_t pa; + +#if defined(__FreeBSD__) + /* This function is called by netmap_dev_pager_fault(), which holds a + * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we + * spin on the trylock. */ + NMA_SPINLOCK(nmd); +#else + NMA_LOCK(nmd); +#endif + pa = nmd->ops->nmd_ofstophys(nmd, off); + NMA_UNLOCK(nmd); + + return pa; +} + +static int +netmap_mem_config(struct netmap_mem_d *nmd) +{ + if (nmd->active) { + /* already in use. Not fatal, but we + * cannot change the configuration + */ + return 0; + } + + return nmd->ops->nmd_config(nmd); +} + +ssize_t +netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *off) +{ + ssize_t rv; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_if_offset(nmd, off); + NMA_UNLOCK(nmd); + + return rv; } -#define NMD_DEFCB1(t0, name, t1) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \ -{ \ - return nmd->ops->nmd_##name(nmd, a1); \ +static void +netmap_mem_delete(struct netmap_mem_d *nmd) +{ + nmd->ops->nmd_delete(nmd); } -#define NMD_DEFCB3(t0, name, t1, t2, t3) \ -t0 \ -netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \ -{ \ - return nmd->ops->nmd_##name(nmd, a1, a2, a3); \ +struct netmap_if * +netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) +{ + struct netmap_if *nifp; + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + nifp = nmd->ops->nmd_if_new(na, priv); + NMA_UNLOCK(nmd); + + return nifp; } -#define NMD_DEFNACB(t0, name) \ -t0 \ -netmap_mem_##name(struct netmap_adapter *na) \ -{ \ - return na->nm_mem->ops->nmd_##name(na); \ +void +netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif) +{ + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + nmd->ops->nmd_if_delete(na, nif); + NMA_UNLOCK(nmd); } -#define NMD_DEFNACB1(t0, name, t1) \ -t0 \ -netmap_mem_##name(struct netmap_adapter *na, t1 a1) \ -{ \ - return na->nm_mem->ops->nmd_##name(na, a1); \ +int +netmap_mem_rings_create(struct netmap_adapter *na) +{ + int rv; + struct netmap_mem_d *nmd = na->nm_mem; + + NMA_LOCK(nmd); + rv = nmd->ops->nmd_rings_create(na); + NMA_UNLOCK(nmd); + + return rv; } -NMD_DEFCB1(int, get_lut, struct netmap_lut *); -NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *); -NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t); -static int netmap_mem_config(struct netmap_mem_d *); -NMD_DEFCB(int, config); -NMD_DEFCB1(ssize_t, if_offset, const void *); -NMD_DEFCB(void, delete); +void +netmap_mem_rings_delete(struct netmap_adapter *na) +{ + struct netmap_mem_d *nmd = na->nm_mem; -NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *); -NMD_DEFNACB1(void, if_delete, struct netmap_if *); -NMD_DEFNACB(int, rings_create); -NMD_DEFNACB(void, rings_delete); + NMA_LOCK(nmd); + nmd->ops->nmd_rings_delete(na); + NMA_UNLOCK(nmd); +} static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); @@ -241,12 +316,6 @@ netmap_mem_get_id(struct netmap_mem_d *nmd) return nmd->nm_id; } -#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) -#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) -#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) -#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) -#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) - #ifdef NM_DEBUG_MEM_PUTGET #define NM_DBG_REFC(nmd, func, line) \ nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount); @@ -285,22 +354,32 @@ __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line) int netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { + int lasterr = 0; if (nm_mem_assign_group(nmd, na->pdev) < 0) { return ENOMEM; - } else { - NMA_LOCK(nmd); - nmd->lasterr = nmd->ops->nmd_finalize(nmd); - NMA_UNLOCK(nmd); } + NMA_LOCK(nmd); + + if (netmap_mem_config(nmd)) + goto out; + + nmd->active++; + + nmd->lasterr = nmd->ops->nmd_finalize(nmd); + if (!nmd->lasterr && na->pdev) { nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); - if (nmd->lasterr) { - netmap_mem_deref(nmd, na); - } } - return nmd->lasterr; +out: + lasterr = nmd->lasterr; + NMA_UNLOCK(nmd); + + if (lasterr) + netmap_mem_deref(nmd, na); + + return lasterr; } static int @@ -400,6 +479,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) } nmd->ops->nmd_deref(nmd); + nmd->active--; + if (!nmd->active) + nmd->nm_grp = -1; + NMA_UNLOCK(nmd); return last_user; } @@ -706,14 +789,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) vm_paddr_t pa; struct netmap_obj_pool *p; -#if defined(__FreeBSD__) - /* This function is called by netmap_dev_pager_fault(), which holds a - * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we - * spin on the trylock. */ - NMA_SPINLOCK(nmd); -#else - NMA_LOCK(nmd); -#endif p = nmd->pools; for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { @@ -727,7 +802,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr); pa.QuadPart += offset % p[i]._objsize; #endif - NMA_UNLOCK(nmd); return pa; } /* this is only in case of errors */ @@ -738,7 +812,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) p[NETMAP_IF_POOL].memtotal + p[NETMAP_RING_POOL].memtotal + p[NETMAP_BUF_POOL].memtotal); - NMA_UNLOCK(nmd); #ifndef _WIN32 return 0; /* bad address */ #else @@ -775,10 +848,10 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd) { - int i, j; - size_t memsize; u_int memflags, ofs = 0; PMDL mainMdl, tempMdl; + uint64_t memsize; + int i, j; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { D("memory not finalised yet"); @@ -847,11 +920,10 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize } static int -netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, - nm_memid_t *id) +netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, + u_int *memflags, nm_memid_t *id) { int error = 0; - NMA_LOCK(nmd); error = netmap_mem_config(nmd); if (error) goto out; @@ -872,7 +944,6 @@ netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, if (id) *id = nmd->nm_id; out: - NMA_UNLOCK(nmd); return error; } @@ -916,11 +987,7 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) static ssize_t netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr) { - ssize_t v; - NMA_LOCK(nmd); - v = netmap_if_offset(nmd, addr); - NMA_UNLOCK(nmd); - return v; + return netmap_if_offset(nmd, addr); } /* @@ -1118,7 +1185,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) slot[i].ptr = 0; } - ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); + ND("%s: allocated %d buffers, %d available, first at %d", p->name, n, p->objfree, pos); return (0); cleanup: @@ -1163,9 +1230,11 @@ netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) u_int i; for (i = 0; i < n; i++) { - if (slot[i].buf_idx > 2) + if (slot[i].buf_idx > 1) netmap_free_buf(nmd, slot[i].buf_idx); } + ND("%s: released some buffers, available: %u", + p->name, p->objfree); } static void @@ -1180,6 +1249,12 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) if (p->invalid_bitmap) nm_os_free(p->invalid_bitmap); p->invalid_bitmap = NULL; + if (!p->alloc_done) { + /* allocation was done by somebody else. + * Let them clean up after themselves. + */ + return; + } if (p->lut) { u_int i; @@ -1199,6 +1274,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) p->memtotal = 0; p->numclusters = 0; p->objfree = 0; + p->alloc_done = 0; } /* @@ -1310,13 +1386,20 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) size_t n; if (p->lut) { - /* already finalized, nothing to do */ + /* if the lut is already there we assume that also all the + * clusters have already been allocated, possibily by somebody + * else (e.g., extmem). In the latter case, the alloc_done flag + * will remain at zero, so that we will not attempt to + * deallocate the clusters by ourselves in + * netmap_reset_obj_allocator. + */ return 0; } /* optimistically assume we have enough memory */ p->numclusters = p->_numclusters; p->objtotal = p->_objtotal; + p->alloc_done = 1; p->lut = nm_alloc_lut(p->objtotal); if (p->lut == NULL) { @@ -1426,7 +1509,7 @@ netmap_mem_reset_all(struct netmap_mem_d *nmd) static int netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { - int i, lim = p->_objtotal; + int i, lim = p->objtotal; struct netmap_lut *lut = &na->na_lut; if (na == NULL || na->pdev == NULL) @@ -1675,10 +1758,6 @@ netmap_mem2_config(struct netmap_mem_d *nmd) { int i; - if (nmd->active) - /* already in use, we cannot change the configuration */ - goto out; - if (!netmap_mem_params_changed(nmd->params)) goto out; @@ -1707,19 +1786,8 @@ out: static int netmap_mem2_finalize(struct netmap_mem_d *nmd) { - int err; - - /* update configuration if changed */ - if (netmap_mem_config(nmd)) - goto out1; - - nmd->active++; - - if (nmd->flags & NETMAP_MEM_FINALIZED) { - /* may happen if config is not changed */ - D("nothing to do"); + if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; - } if (netmap_mem_finalize_all(nmd)) goto out; @@ -1727,13 +1795,7 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd) nmd->lasterr = 0; out: - if (nmd->lasterr) - nmd->active--; -out1: - err = nmd->lasterr; - - return err; - + return nmd->lasterr; } static void @@ -1782,7 +1844,7 @@ netmap_free_rings(struct netmap_adapter *na) for_rx_tx(t) { u_int i; for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { @@ -1793,8 +1855,12 @@ netmap_free_rings(struct netmap_adapter *na) } if (netmap_verbose) D("deleting ring %s", kring->name); - if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS) + if (!(kring->nr_kflags & NKR_FAKERING)) { + ND("freeing bufs for %s", kring->name); netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); + } else { + ND("NOT freeing bufs for %s", kring->name); + } netmap_ring_free(na->nm_mem, ring); kring->ring = NULL; } @@ -1813,13 +1879,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na) { enum txrx t; - NMA_LOCK(na->nm_mem); - for_rx_tx(t) { u_int i; for (i = 0; i <= nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; u_int len, ndesc; @@ -1857,14 +1921,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na) ND("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); ND("initializing slots for %s_ring", nm_txrx2str(txrx)); - if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) { + if (!(kring->nr_kflags & NKR_FAKERING)) { /* this is a real ring */ + ND("allocating buffers for %s", kring->name); if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { D("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); goto cleanup; } } else { /* this is a fake ring, set all indices to 0 */ + ND("NOT allocating buffers for %s", kring->name); netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); } /* ring info */ @@ -1873,15 +1939,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na) } } - NMA_UNLOCK(na->nm_mem); - return 0; cleanup: netmap_free_rings(na); - NMA_UNLOCK(na->nm_mem); - return ENOMEM; } @@ -1889,11 +1951,7 @@ static void netmap_mem2_rings_delete(struct netmap_adapter *na) { /* last instance, release bufs and rings */ - NMA_LOCK(na->nm_mem); - netmap_free_rings(na); - - NMA_UNLOCK(na->nm_mem); } @@ -1924,8 +1982,6 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * to the tx and rx rings in the shared memory region. */ - NMA_LOCK(na->nm_mem); - len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t)); nifp = netmap_if_malloc(na->nm_mem, len); if (nifp == NULL) { @@ -1949,10 +2005,10 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * ring, like we do for buffers? */ ssize_t ofs = 0; - if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX] + if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX] && i < priv->np_qlast[NR_TX]) { ofs = netmap_ring_offset(na->nm_mem, - na->tx_rings[i].ring) - base; + na->tx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs; } @@ -1961,16 +2017,14 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * ring, like we do for buffers? */ ssize_t ofs = 0; - if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX] + if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX] && i < priv->np_qlast[NR_RX]) { ofs = netmap_ring_offset(na->nm_mem, - na->rx_rings[i].ring) - base; + na->rx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs; } - NMA_UNLOCK(na->nm_mem); - return (nifp); } @@ -1980,21 +2034,15 @@ netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) if (nifp == NULL) /* nothing to do */ return; - NMA_LOCK(na->nm_mem); if (nifp->ni_bufs_head) netmap_extra_free(na, nifp->ni_bufs_head); netmap_if_free(na->nm_mem, nifp); - - NMA_UNLOCK(na->nm_mem); } static void netmap_mem2_deref(struct netmap_mem_d *nmd) { - nmd->active--; - if (!nmd->active) - nmd->nm_grp = -1; if (netmap_verbose) D("active = %d", nmd->active); @@ -2016,42 +2064,32 @@ struct netmap_mem_ops netmap_mem_global_ops = { }; int -netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) +netmap_mem_pools_info_get(struct nmreq_pools_info *req, + struct netmap_mem_d *nmd) { - uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; - struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp); - struct netmap_pools_info pi; - uint64_t memsize; - uint16_t memid; int ret; - ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid); + ret = netmap_mem_get_info(nmd, &req->nr_memsize, NULL, + &req->nr_mem_id); if (ret) { return ret; } - pi.memsize = memsize; - pi.memid = memid; NMA_LOCK(nmd); - pi.if_pool_offset = 0; - pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; - pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; + req->nr_if_pool_offset = 0; + req->nr_if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; + req->nr_if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; - pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; - pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; - pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; + req->nr_ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; + req->nr_ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; + req->nr_ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; - pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + + req->nr_buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal; - pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; - pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; + req->nr_buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; + req->nr_buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; NMA_UNLOCK(nmd); - ret = copyout(&pi, upi, sizeof(pi)); - if (ret) { - return ret; - } - return 0; } @@ -2059,8 +2097,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) struct netmap_mem_ext { struct netmap_mem_d up; - struct page **pages; - int nr_pages; + struct nm_os_extmem *os; struct netmap_mem_ext *next, *prev; }; @@ -2090,29 +2127,14 @@ netmap_mem_ext_unregister(struct netmap_mem_ext *e) e->prev = e->next = NULL; } -static int -netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages) -{ - int i; - - if (e->nr_pages != nr_pages) - return 0; - - for (i = 0; i < nr_pages; i++) - if (pages[i] != e->pages[i]) - return 0; - - return 1; -} - static struct netmap_mem_ext * -netmap_mem_ext_search(struct page **pages, int nr_pages) +netmap_mem_ext_search(struct nm_os_extmem *os) { struct netmap_mem_ext *e; NM_MTX_LOCK(nm_mem_ext_list_lock); for (e = netmap_mem_ext_list; e; e = e->next) { - if (netmap_mem_ext_same_pages(e, pages, nr_pages)) { + if (nm_os_extmem_isequal(e->os, os)) { netmap_mem_get(&e->up); break; } @@ -2123,18 +2145,6 @@ netmap_mem_ext_search(struct page **pages, int nr_pages) static void -netmap_mem_ext_free_pages(struct page **pages, int nr_pages) -{ - int i; - - for (i = 0; i < nr_pages; i++) { - kunmap(pages[i]); - put_page(pages[i]); - } - nm_os_vfree(pages); -} - -static void netmap_mem_ext_delete(struct netmap_mem_d *d) { int i; @@ -2151,11 +2161,8 @@ netmap_mem_ext_delete(struct netmap_mem_d *d) p->lut = NULL; } } - if (e->pages) { - netmap_mem_ext_free_pages(e->pages, e->nr_pages); - e->pages = NULL; - e->nr_pages = 0; - } + if (e->os) + nm_os_extmem_delete(e->os); netmap_mem2_delete(d); } @@ -2181,117 +2188,66 @@ struct netmap_mem_ops netmap_mem_ext_ops = { }; struct netmap_mem_d * -netmap_mem_ext_create(struct nmreq *nmr, int *perror) +netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror) { - uintptr_t p = *(uintptr_t *)&nmr->nr_arg1; - struct netmap_pools_info pi; int error = 0; - unsigned long end, start; - int nr_pages, res, i, j; - struct page **pages = NULL; + int i, j; struct netmap_mem_ext *nme; char *clust; size_t off; - - error = copyin((void *)p, &pi, sizeof(pi)); - if (error) - goto out; + struct nm_os_extmem *os = NULL; + int nr_pages; // XXX sanity checks - if (pi.if_pool_objtotal == 0) - pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; - if (pi.if_pool_objsize == 0) - pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; - if (pi.ring_pool_objtotal == 0) - pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; - if (pi.ring_pool_objsize == 0) - pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; - if (pi.buf_pool_objtotal == 0) - pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; - if (pi.buf_pool_objsize == 0) - pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; + if (pi->nr_if_pool_objtotal == 0) + pi->nr_if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; + if (pi->nr_if_pool_objsize == 0) + pi->nr_if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; + if (pi->nr_ring_pool_objtotal == 0) + pi->nr_ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; + if (pi->nr_ring_pool_objsize == 0) + pi->nr_ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; + if (pi->nr_buf_pool_objtotal == 0) + pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; + if (pi->nr_buf_pool_objsize == 0) + pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; D("if %d %d ring %d %d buf %d %d", - pi.if_pool_objtotal, pi.if_pool_objsize, - pi.ring_pool_objtotal, pi.ring_pool_objsize, - pi.buf_pool_objtotal, pi.buf_pool_objsize); + pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize, + pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize, + pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize); - end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = p >> PAGE_SHIFT; - nr_pages = end - start; - - pages = nm_os_vmalloc(nr_pages * sizeof(*pages)); - if (pages == NULL) { - error = ENOMEM; + os = nm_os_extmem_create(usrptr, pi, &error); + if (os == NULL) { + D("os extmem creation failed"); goto out; } -#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS - res = get_user_pages_unlocked( - p, - nr_pages, - pages, - FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags -#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS) - res = get_user_pages_unlocked( - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages); -#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS) - res = get_user_pages_unlocked( - current, - current->mm, - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages); -#else - down_read(¤t->mm->mmap_sem); - res = get_user_pages( - current, - current->mm, - p, - nr_pages, - 1, /* write */ - 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); -#endif /* NETMAP_LINUX_GUP */ - - if (res < nr_pages) { - error = EFAULT; - goto out_unmap; - } - - nme = netmap_mem_ext_search(pages, nr_pages); + nme = netmap_mem_ext_search(os); if (nme) { - netmap_mem_ext_free_pages(pages, nr_pages); + nm_os_extmem_delete(os); return &nme->up; } D("not found, creating new"); nme = _netmap_mem_private_new(sizeof(*nme), (struct netmap_obj_params[]){ - { pi.if_pool_objsize, pi.if_pool_objtotal }, - { pi.ring_pool_objsize, pi.ring_pool_objtotal }, - { pi.buf_pool_objsize, pi.buf_pool_objtotal }}, + { pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal }, + { pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal }, + { pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }}, &netmap_mem_ext_ops, &error); if (nme == NULL) goto out_unmap; + nr_pages = nm_os_extmem_nr_pages(os); + /* from now on pages will be released by nme destructor; * we let res = 0 to prevent release in out_unmap below */ - res = 0; - nme->pages = pages; - nme->nr_pages = nr_pages; - nme->up.flags |= NETMAP_MEM_EXT; + nme->os = os; + os = NULL; /* pass ownership */ - clust = kmap(*pages); + clust = nm_os_extmem_nextpage(nme->os); off = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &nme->up.pools[i]; @@ -2323,9 +2279,11 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) for (j = 0; j < o->num && nr_pages > 0; j++) { size_t noff; - size_t skip; p->lut[j].vaddr = clust + off; +#if !defined(linux) && !defined(_WIN32) + p->lut[j].paddr = vtophys(p->lut[j].vaddr); +#endif ND("%s %d at %p", p->name, j, p->lut[j].vaddr); noff = off + p->_objsize; if (noff < PAGE_SIZE) { @@ -2333,15 +2291,16 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) continue; } ND("too big, recomputing offset..."); - skip = PAGE_SIZE - (off & PAGE_MASK); while (noff >= PAGE_SIZE) { - noff -= skip; - pages++; + char *old_clust = clust; + noff -= PAGE_SIZE; + clust = nm_os_extmem_nextpage(nme->os); nr_pages--; ND("noff %zu page %p nr_pages %d", noff, page_to_virt(*pages), nr_pages); if (noff > 0 && !nm_isset(p->invalid_bitmap, j) && - (nr_pages == 0 || *pages != *(pages - 1) + 1)) + (nr_pages == 0 || + old_clust + PAGE_SIZE != clust)) { /* out of space or non contiguous, * drop this object @@ -2351,11 +2310,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) } if (nr_pages == 0) break; - skip = PAGE_SIZE; } off = noff; - if (nr_pages > 0) - clust = kmap(*pages); } p->objtotal = j; p->numclusters = p->objtotal; @@ -2363,12 +2319,6 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) ND("%d memtotal %u", j, p->memtotal); } - /* skip the first netmap_if, where the pools info reside */ - { - struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL]; - p->invalid_bitmap[0] |= 1U; - } - netmap_mem_ext_register(nme); return &nme->up; @@ -2376,10 +2326,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror) out_delete: netmap_mem_put(&nme->up); out_unmap: - for (i = 0; i < res; i++) - put_page(pages[i]); - if (res) - nm_os_free(pages); + if (os) + nm_os_extmem_delete(os); out: if (perror) *perror = error; @@ -2504,8 +2452,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, { int error = 0; - NMA_LOCK(nmd); - error = nmd->ops->nmd_config(nmd); if (error) goto out; @@ -2518,7 +2464,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, *id = nmd->nm_id; out: - NMA_UNLOCK(nmd); return error; } @@ -2556,21 +2501,19 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) int i; int error = 0; - nmd->active++; - if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; if (ptnmd->ptn_dev == NULL) { D("ptnetmap memdev not attached"); error = ENOMEM; - goto err; + goto out; } /* Map memory through ptnetmap-memdev BAR. */ error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr, &ptnmd->nm_addr, &mem_size); if (error) - goto err; + goto out; /* Initialize the lut using the information contained in the * ptnetmap memory device. */ @@ -2605,11 +2548,16 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) ptnmd->buf_lut.objsize = bufsize; nmd->nm_totalsize = (unsigned int)mem_size; + /* Initialize these fields as are needed by + * netmap_mem_bufsize(). + * XXX please improve this, why do we need this + * replication? maybe we nmd->pools[] should no be + * there for the guest allocator? */ + nmd->pools[NETMAP_BUF_POOL]._objsize = bufsize; + nmd->pools[NETMAP_BUF_POOL]._objtotal = nbuffers; + nmd->flags |= NETMAP_MEM_FINALIZED; out: - return 0; -err: - nmd->active--; return error; } @@ -2618,8 +2566,7 @@ netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; - nmd->active--; - if (nmd->active <= 0 && + if (nmd->active == 1 && (nmd->flags & NETMAP_MEM_FINALIZED)) { nmd->flags &= ~NETMAP_MEM_FINALIZED; /* unmap ptnetmap-memdev memory */ @@ -2661,8 +2608,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv struct mem_pt_if *ptif; struct netmap_if *nifp = NULL; - NMA_LOCK(na->nm_mem); - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); @@ -2671,7 +2616,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) + ptif->nifp_offset); - NMA_UNLOCK(na->nm_mem); out: return nifp; } @@ -2681,12 +2625,10 @@ netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) { struct mem_pt_if *ptif; - NMA_LOCK(na->nm_mem); ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); } - NMA_UNLOCK(na->nm_mem); } static int @@ -2697,8 +2639,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) struct netmap_if *nifp; int i, error = -1; - NMA_LOCK(na->nm_mem); - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); @@ -2709,14 +2649,14 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) /* point each kring to the corresponding backend ring */ nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset); for (i = 0; i <= na->num_tx_rings; i++) { - struct netmap_kring *kring = na->tx_rings + i; + struct netmap_kring *kring = na->tx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) ((char *)nifp + nifp->ring_ofs[i]); } for (i = 0; i <= na->num_rx_rings; i++) { - struct netmap_kring *kring = na->rx_rings + i; + struct netmap_kring *kring = na->rx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) @@ -2726,8 +2666,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) error = 0; out: - NMA_UNLOCK(na->nm_mem); - return error; } diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index f0bee7a33fd53..977bf622862a0 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -137,12 +137,12 @@ void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *); int netmap_mem_rings_create(struct netmap_adapter *); void netmap_mem_rings_delete(struct netmap_adapter *); int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); -int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); -int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); +int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); +int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, + u_int *memflags, nm_memid_t *id); ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int* error); -void netmap_mem_delete(struct netmap_mem_d *); #define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__) #define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__) @@ -152,7 +152,7 @@ struct netmap_mem_d* netmap_mem_find(nm_memid_t); unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); #ifdef WITH_EXTMEM -struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *); +struct netmap_mem_d* netmap_mem_ext_create(uint64_t, struct nmreq_pools_info *, int *); #else /* !WITH_EXTMEM */ #define netmap_mem_ext_create(nmr, _perr) \ ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) @@ -167,7 +167,8 @@ struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16 int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *); #endif /* WITH_PTNETMAP_GUEST */ -int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); +int netmap_mem_pools_info_get(struct nmreq_pools_info *, + struct netmap_mem_d *); #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ @@ -175,4 +176,14 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); +#ifdef WITH_EXTMEM +#include <net/netmap_virt.h> +struct nm_os_extmem; /* opaque */ +struct nm_os_extmem *nm_os_extmem_create(unsigned long, struct nmreq_pools_info *, int *perror); +char *nm_os_extmem_nextpage(struct nm_os_extmem *); +int nm_os_extmem_nr_pages(struct nm_os_extmem *); +int nm_os_extmem_isequal(struct nm_os_extmem *, struct nm_os_extmem *); +void nm_os_extmem_delete(struct nm_os_extmem *); +#endif /* WITH_EXTMEM */ + #endif diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c index e7cc05f5ab0f0..f6b7c93adc768 100644 --- a/sys/dev/netmap/netmap_monitor.c +++ b/sys/dev/netmap/netmap_monitor.c @@ -167,8 +167,8 @@ netmap_monitor_krings_create(struct netmap_adapter *na) if (error) return error; /* override the host rings callbacks */ - na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync; - na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync; + na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync; + na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync; return 0; } @@ -390,7 +390,7 @@ netmap_monitor_stop(struct netmap_adapter *na) u_int i; for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_kring *zkring; u_int j; @@ -456,7 +456,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) } for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - mkring = &NMR(na, t)[i]; + mkring = NMR(na, t)[i]; if (!nm_kring_pending_on(mkring)) continue; mkring->nr_mode = NKR_NETMAP_ON; @@ -466,7 +466,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) if (i > nma_get_nrings(pna, s)) continue; if (mna->flags & nm_txrx2flag(s)) { - kring = &NMR(pna, s)[i]; + kring = NMR(pna, s)[i]; netmap_monitor_add(mkring, kring, zmon); } } @@ -478,7 +478,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - mkring = &NMR(na, t)[i]; + mkring = NMR(na, t)[i]; if (!nm_kring_pending_off(mkring)) continue; mkring->nr_mode = NKR_NETMAP_OFF; @@ -494,7 +494,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) if (i > nma_get_nrings(pna, s)) continue; if (mna->flags & nm_txrx2flag(s)) { - kring = &NMR(pna, s)[i]; + kring = NMR(pna, s)[i]; netmap_monitor_del(mkring, kring); } } @@ -824,38 +824,41 @@ netmap_monitor_dtor(struct netmap_adapter *na) } -/* check if nmr is a request for a monitor adapter that we can satisfy */ +/* check if req is a request for a monitor adapter that we can satisfy */ int -netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create) +netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create) { - struct nmreq pnmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; + struct nmreq_register preq; struct netmap_adapter *pna; /* parent adapter */ struct netmap_monitor_adapter *mna; struct ifnet *ifp = NULL; int error; - int zcopy = (nmr->nr_flags & NR_ZCOPY_MON); + int zcopy = (req->nr_flags & NR_ZCOPY_MON); char monsuff[10] = ""; if (zcopy) { - nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX); + req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX); } - if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { + if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { ND("not a monitor"); return 0; } /* this is a request for a monitor adapter */ - ND("flags %x", nmr->nr_flags); + ND("flags %lx", req->nr_flags); - /* first, try to find the adapter that we want to monitor - * We use the same nmr, after we have turned off the monitor flags. + /* First, try to find the adapter that we want to monitor. + * We use the same req, after we have turned off the monitor flags. * In this way we can potentially monitor everything netmap understands, * except other monitors. */ - memcpy(&pnmr, nmr, sizeof(pnmr)); - pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON); - error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create); + memcpy(&preq, req, sizeof(preq)); + preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON); + hdr->nr_body = (uint64_t)&preq; + error = netmap_get_na(hdr, &pna, &ifp, nmd, create); + hdr->nr_body = (uint64_t)req; if (error) { D("parent lookup failed: %d", error); return error; @@ -881,7 +884,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, mna->priv.np_na = pna; /* grab all the rings we need in the parent */ - error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags); + error = netmap_interp_ringid(&mna->priv, req->nr_mode, req->nr_ringid, + req->nr_flags); if (error) { D("ringid error"); goto free_out; @@ -892,8 +896,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name, monsuff, zcopy ? "z" : "", - (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "", - (nmr->nr_flags & NR_MONITOR_TX) ? "t" : ""); + (req->nr_flags & NR_MONITOR_RX) ? "r" : "", + (req->nr_flags & NR_MONITOR_TX) ? "t" : ""); /* the monitor supports the host rings iff the parent does */ mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS); @@ -913,10 +917,10 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, * the parent rings, but the user may ask for a different * number */ - mna->up.num_tx_desc = nmr->nr_tx_slots; + mna->up.num_tx_desc = req->nr_tx_slots; nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 1, NM_MONITOR_MAXSLOTS, NULL); - mna->up.num_rx_desc = nmr->nr_rx_slots; + mna->up.num_rx_desc = req->nr_rx_slots; nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 1, NM_MONITOR_MAXSLOTS, NULL); if (zcopy) { @@ -950,7 +954,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, } /* remember the traffic directions we have to monitor */ - mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON)); + mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON)); *na = &mna->up; netmap_adapter_get(*na); diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 48dde5382f77a..3b0fb869231b2 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -77,6 +77,7 @@ #ifdef WITH_PIPES #define NM_PIPE_MAXSLOTS 4096 +#define NM_PIPE_MAXRINGS 256 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */ SYSBEGIN(vars_pipes); @@ -129,14 +130,19 @@ netmap_pipe_dealloc(struct netmap_adapter *na) /* find a pipe endpoint with the given id among the parent's pipes */ static struct netmap_pipe_adapter * -netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id) +netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id) { int i; struct netmap_pipe_adapter *na; for (i = 0; i < parent->na_next_pipe; i++) { + const char *na_pipe_id; na = parent->na_pipes[i]; - if (na->id == pipe_id) { + na_pipe_id = strrchr(na->up.name, + na->role == NM_PIPE_ROLE_MASTER ? '{' : '}'); + KASSERT(na_pipe_id != NULL, ("Invalid pipe name")); + ++na_pipe_id; + if (!strcmp(na_pipe_id, pipe_id)) { return na; } } @@ -179,63 +185,46 @@ int netmap_pipe_txsync(struct netmap_kring *txkring, int flags) { struct netmap_kring *rxkring = txkring->pipe; - u_int limit; /* slots to transfer */ - u_int j, k, lim_tx = txkring->nkr_num_slots - 1, - lim_rx = rxkring->nkr_num_slots - 1; - int m, busy; + u_int k, lim = txkring->nkr_num_slots - 1; + int m; /* slots to transfer */ struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); - ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, + ND(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d", + txkring->nr_hwcur, txkring->nr_hwtail, txkring->rcur, txkring->rhead, txkring->rtail); - j = rxkring->nr_hwtail; /* RX */ - k = txkring->nr_hwcur; /* TX */ m = txkring->rhead - txkring->nr_hwcur; /* new slots */ if (m < 0) m += txkring->nkr_num_slots; - limit = m; - m = lim_rx; /* max avail space on destination */ - busy = j - rxkring->nr_hwcur; /* busy slots */ - if (busy < 0) - busy += rxkring->nkr_num_slots; - m -= busy; /* subtract busy slots */ - ND(2, "m %d limit %d", m, limit); - if (m < limit) - limit = m; - if (limit == 0) { - /* either the rxring is full, or nothing to send */ + if (m == 0) { + /* nothing to send */ return 0; } - while (limit-- > 0) { - struct netmap_slot *rs = &rxring->slot[j]; + for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) { + struct netmap_slot *rs = &rxring->slot[k]; struct netmap_slot *ts = &txring->slot[k]; - struct netmap_slot tmp; - __builtin_prefetch(ts + 1); + rs->len = ts->len; + rs->ptr = ts->ptr; - /* swap the slots and report the buffer change */ - tmp = *rs; - tmp.flags |= NS_BUF_CHANGED; - *rs = *ts; - rs->flags |= NS_BUF_CHANGED; - *ts = tmp; - - j = nm_next(j, lim_rx); - k = nm_next(k, lim_tx); + if (ts->flags & NS_BUF_CHANGED) { + rs->buf_idx = ts->buf_idx; + rs->flags |= NS_BUF_CHANGED; + ts->flags &= ~NS_BUF_CHANGED; + } } mb(); /* make sure the slots are updated before publishing them */ - rxkring->nr_hwtail = j; + rxkring->nr_hwtail = k; txkring->nr_hwcur = k; - txkring->nr_hwtail = nm_prev(k, lim_tx); - ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail, - txkring->rcur, txkring->rhead, txkring->rtail, j); + ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", + txkring->nr_hwcur, txkring->nr_hwtail, + txkring->rcur, txkring->rhead, txkring->rtail, k); - mb(); /* make sure rxkring->nr_hwtail is updated before notifying */ rxkring->nm_notify(rxkring, 0); return 0; @@ -245,20 +234,46 @@ int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) { struct netmap_kring *txkring = rxkring->pipe; - uint32_t oldhwcur = rxkring->nr_hwcur; + u_int k, lim = rxkring->nkr_num_slots - 1; + int m; /* slots to release */ + struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; - ND("%s %x <- %s", rxkring->name, flags, txkring->name); - rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */ - ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail, + ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); + ND(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d", + rxkring->nr_hwcur, rxkring->nr_hwtail, rxkring->rcur, rxkring->rhead, rxkring->rtail); - mb(); /* paired with the first mb() in txsync */ - if (oldhwcur != rxkring->nr_hwcur) { - /* we have released some slots, notify the other end */ - mb(); /* make sure nr_hwcur is updated before notifying */ - txkring->nm_notify(txkring, 0); + m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */ + if (m < 0) + m += rxkring->nkr_num_slots; + + if (m == 0) { + /* nothing to release */ + return 0; } - return 0; + + for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) { + struct netmap_slot *rs = &rxring->slot[k]; + struct netmap_slot *ts = &txring->slot[k]; + + if (rs->flags & NS_BUF_CHANGED) { + /* copy the slot and report the buffer change */ + *ts = *rs; + rs->flags &= ~NS_BUF_CHANGED; + } + } + + mb(); /* make sure the slots are updated before publishing them */ + txkring->nr_hwtail = nm_prev(k, lim); + rxkring->nr_hwcur = k; + + ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", + rxkring->nr_hwcur, rxkring->nr_hwtail, + rxkring->rcur, rxkring->rhead, rxkring->rtail, k); + + txkring->nm_notify(txkring, 0); + + return 0; } /* Pipe endpoints are created and destroyed together, so that endopoints do not @@ -335,8 +350,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na) for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < nma_get_nrings(na, t); i++) { - NMR(na, t)[i].pipe = NMR(ona, r) + i; - NMR(ona, r)[i].pipe = NMR(na, t) + i; + NMR(na, t)[i]->pipe = NMR(ona, r)[i]; + NMR(ona, r)[i]->pipe = NMR(na, t)[i]; + /* mark all peer-adapter rings as fake */ + NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING; } } @@ -380,7 +397,7 @@ err: * usr1 --> e1 e2 <-- usr2 * * and we are either e1 or e2. Add a ref from the - * other end and hide our rings. + * other end. */ static int netmap_pipe_reg(struct netmap_adapter *na, int onoff) @@ -395,7 +412,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) if (onoff) { for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { /* mark the peer ring as needed */ @@ -404,7 +421,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) } } - /* create all missing needed rings on the other end */ + /* create all missing needed rings on the other end. + * Either our end, or the other, has been marked as + * fake, so the allocation will not be done twice. + */ error = netmap_mem_rings_create(ona); if (error) return error; @@ -412,9 +432,32 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) /* In case of no error we put our rings in netmap mode */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; - + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { + struct netmap_kring *sring, *dring; + + /* copy the buffers from the non-fake ring */ + if (kring->nr_kflags & NKR_FAKERING) { + sring = kring->pipe; + dring = kring; + } else { + sring = kring; + dring = kring->pipe; + } + memcpy(dring->ring->slot, + sring->ring->slot, + sizeof(struct netmap_slot) * + sring->nkr_num_slots); + /* mark both rings as fake and needed, + * so that buffers will not be + * deleted by the standard machinery + * (we will delete them by ourselves in + * netmap_pipe_krings_delete) + */ + sring->nr_kflags |= + (NKR_FAKERING | NKR_NEEDRING); + dring->nr_kflags |= + (NKR_FAKERING | NKR_NEEDRING); kring->nr_mode = NKR_NETMAP_ON; } } @@ -426,21 +469,13 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; - /* mark the peer ring as no longer needed by us - * (it may still be kept if sombody else is using it) - */ - if (kring->pipe) { - kring->pipe->nr_kflags &= ~NKR_NEEDRING; - } } } } - /* delete all the peer rings that are no longer needed */ - netmap_mem_rings_delete(ona); } if (na->active_fds) { @@ -482,29 +517,73 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) * and we are either e1 or e2. * * In the former case we have to also delete the krings of e2; - * in the latter case we do nothing (note that our krings - * have already been hidden in the unregister callback). + * in the latter case we do nothing. */ static void netmap_pipe_krings_delete(struct netmap_adapter *na) { struct netmap_pipe_adapter *pna = (struct netmap_pipe_adapter *)na; - struct netmap_adapter *ona; /* na of the other end */ + struct netmap_adapter *sna, *ona; /* na of the other end */ + enum txrx t; + int i; if (!pna->peer_ref) { ND("%p: case 2, kept alive by peer", na); return; } + ona = &pna->peer->up; /* case 1) above */ ND("%p: case 1, deleting everything", na); + /* To avoid double-frees we zero-out all the buffers in the kernel part + * of each ring. The reason is this: If the user is behaving correctly, + * all buffers are found in exactly one slot in the userspace part of + * some ring. If the user is not behaving correctly, we cannot release + * buffers cleanly anyway. In the latter case, the allocator will + * return to a clean state only when all its users will close. + */ + sna = na; +cleanup: + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) { + struct netmap_kring *kring = NMR(sna, t)[i]; + struct netmap_ring *ring = kring->ring; + uint32_t j, lim = kring->nkr_num_slots - 1; + + ND("%s ring %p hwtail %u hwcur %u", + kring->name, ring, kring->nr_hwtail, kring->nr_hwcur); + + if (ring == NULL) + continue; + + if (kring->nr_hwtail == kring->nr_hwcur) + ring->slot[kring->nr_hwtail].buf_idx = 0; + + for (j = nm_next(kring->nr_hwtail, lim); + j != kring->nr_hwcur; + j = nm_next(j, lim)) + { + ND("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx); + ring->slot[j].buf_idx = 0; + } + kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING); + } + + } + if (sna != ona && ona->tx_rings) { + sna = ona; + goto cleanup; + } + + netmap_mem_rings_delete(na); netmap_krings_delete(na); /* also zeroes tx_rings etc. */ - ona = &pna->peer->up; + if (ona->tx_rings == NULL) { /* already deleted, we must be on an * cleanup-after-error path */ return; } + netmap_mem_rings_delete(ona); netmap_krings_delete(ona); } @@ -520,7 +599,7 @@ netmap_pipe_dtor(struct netmap_adapter *na) pna->peer_ref = 0; netmap_adapter_put(&pna->peer->up); } - if (pna->role == NR_REG_PIPE_MASTER) + if (pna->role == NM_PIPE_ROLE_MASTER) netmap_pipe_remove(pna->parent, pna); if (pna->parent_ifp) if_rele(pna->parent_ifp); @@ -529,34 +608,55 @@ netmap_pipe_dtor(struct netmap_adapter *na) } int -netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - struct nmreq pnmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct netmap_adapter *pna; /* parent adapter */ - struct netmap_pipe_adapter *mna, *sna, *req; + struct netmap_pipe_adapter *mna, *sna, *reqna; struct ifnet *ifp = NULL; - u_int pipe_id; - int role = nmr->nr_flags & NR_REG_MASK; + const char *pipe_id = NULL; + int role = 0; int error, retries = 0; + char *cbra; - ND("flags %x", nmr->nr_flags); + /* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */ + cbra = strrchr(hdr->nr_name, '{'); + if (cbra != NULL) { + role = NM_PIPE_ROLE_MASTER; + } else { + cbra = strrchr(hdr->nr_name, '}'); + if (cbra != NULL) { + role = NM_PIPE_ROLE_SLAVE; + } else { + ND("not a pipe"); + return 0; + } + } + pipe_id = cbra + 1; + if (*pipe_id == '\0' || cbra == hdr->nr_name) { + /* Bracket is the last character, so pipe name is missing; + * or bracket is the first character, so base port name + * is missing. */ + return EINVAL; + } - if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) { - ND("not a pipe"); - return 0; + if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) { + /* We only accept modes involving hardware rings. */ + return EINVAL; } - role = nmr->nr_flags & NR_REG_MASK; /* first, try to find the parent adapter */ - bzero(&pnmr, sizeof(pnmr)); - memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ); - /* pass to parent the requested number of pipes */ - pnmr.nr_arg1 = nmr->nr_arg1; for (;;) { + char nr_name_orig[NETMAP_REQ_IFNAMSIZ]; int create_error; - error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create); + /* Temporarily remove the pipe suffix. */ + strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); + *cbra = '\0'; + error = netmap_get_na(hdr, &pna, &ifp, nmd, create); + /* Restore the pipe suffix. */ + strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (!error) break; if (error != ENXIO || retries++) { @@ -565,9 +665,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, } ND("try to create a persistent vale port"); /* create a persistent vale port and try again */ + *cbra = '\0'; NMG_UNLOCK(); - create_error = netmap_vi_create(&pnmr, 1 /* autodelete */); + create_error = netmap_vi_create(hdr, 1 /* autodelete */); NMG_LOCK(); + strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (create_error && create_error != EEXIST) { if (create_error != EOPNOTSUPP) { D("failed to create a persistent vale port: %d", create_error); @@ -583,16 +685,15 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, } /* next, lookup the pipe id in the parent list */ - req = NULL; - pipe_id = nmr->nr_ringid & NETMAP_RING_MASK; + reqna = NULL; mna = netmap_pipe_find(pna, pipe_id); if (mna) { if (mna->role == role) { - ND("found %d directly at %d", pipe_id, mna->parent_slot); - req = mna; + ND("found %s directly at %d", pipe_id, mna->parent_slot); + reqna = mna; } else { - ND("found %d indirectly at %d", pipe_id, mna->parent_slot); - req = mna->peer; + ND("found %s indirectly at %d", pipe_id, mna->parent_slot); + reqna = mna->peer; } /* the pipe we have found already holds a ref to the parent, * so we need to drop the one we got from netmap_get_na() @@ -600,7 +701,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, netmap_unget_na(pna, ifp); goto found; } - ND("pipe %d not found, create %d", pipe_id, create); + ND("pipe %s not found, create %d", pipe_id, create); if (!create) { error = ENODEV; goto put_out; @@ -614,10 +715,9 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, error = ENOMEM; goto put_out; } - snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id); + snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id); - mna->id = pipe_id; - mna->role = NR_REG_PIPE_MASTER; + mna->role = NM_PIPE_ROLE_MASTER; mna->parent = pna; mna->parent_ifp = ifp; @@ -631,12 +731,16 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, mna->up.na_flags |= NAF_MEM_OWNER; mna->up.na_lut = pna->na_lut; - mna->up.num_tx_rings = 1; - mna->up.num_rx_rings = 1; - mna->up.num_tx_desc = nmr->nr_tx_slots; + mna->up.num_tx_rings = req->nr_tx_rings; + nm_bound_var(&mna->up.num_tx_rings, 1, + 1, NM_PIPE_MAXRINGS, NULL); + mna->up.num_rx_rings = req->nr_rx_rings; + nm_bound_var(&mna->up.num_rx_rings, 1, + 1, NM_PIPE_MAXRINGS, NULL); + mna->up.num_tx_desc = req->nr_tx_slots; nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc, 1, NM_PIPE_MAXSLOTS, NULL); - mna->up.num_rx_desc = nmr->nr_rx_slots; + mna->up.num_rx_desc = req->nr_rx_slots; nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc, 1, NM_PIPE_MAXSLOTS, NULL); error = netmap_attach_common(&mna->up); @@ -656,8 +760,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, /* most fields are the same, copy from master and then fix */ *sna = *mna; sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem); - snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id); - sna->role = NR_REG_PIPE_SLAVE; + /* swap the number of tx/rx rings */ + sna->up.num_tx_rings = mna->up.num_rx_rings; + sna->up.num_rx_rings = mna->up.num_tx_rings; + snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id); + sna->role = NM_PIPE_ROLE_SLAVE; error = netmap_attach_common(&sna->up); if (error) goto free_sna; @@ -674,21 +781,21 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, if (ifp) if_ref(ifp); - if (role == NR_REG_PIPE_MASTER) { - req = mna; + if (role == NM_PIPE_ROLE_MASTER) { + reqna = mna; mna->peer_ref = 1; netmap_adapter_get(&sna->up); } else { - req = sna; + reqna = sna; sna->peer_ref = 1; netmap_adapter_get(&mna->up); } ND("created master %p and slave %p", mna, sna); found: - ND("pipe %d %s at %p", pipe_id, - (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req); - *na = &req->up; + ND("pipe %s %s at %p", pipe_id, + (reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna); + *na = &reqna->up; netmap_adapter_get(*na); /* keep the reference to the parent. diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c index edb49dc504acd..cfa32b0bcf5f4 100644 --- a/sys/dev/netmap/netmap_pt.c +++ b/sys/dev/netmap/netmap_pt.c @@ -639,9 +639,9 @@ static struct netmap_kring * ptnetmap_kring(struct netmap_pt_host_adapter *pth_na, int k) { if (k < pth_na->up.num_tx_rings) { - return pth_na->up.tx_rings + k; + return pth_na->up.tx_rings[k]; } - return pth_na->up.rx_rings + k - pth_na->up.num_tx_rings; + return pth_na->up.rx_rings[k - pth_na->up.num_tx_rings]; } static int @@ -676,8 +676,19 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na, struct nm_kctx_cfg nmk_cfg; unsigned int num_rings; uint8_t *cfg_entries = (uint8_t *)(cfg + 1); + unsigned int expected_cfgtype = 0; int k; +#if defined(__FreeBSD__) + expected_cfgtype = PTNETMAP_CFGTYPE_BHYVE; +#elif defined(linux) + expected_cfgtype = PTNETMAP_CFGTYPE_QEMU; +#endif + if (cfg->cfgtype != expected_cfgtype) { + D("Unsupported cfgtype %u", cfg->cfgtype); + return EINVAL; + } + num_rings = pth_na->up.num_tx_rings + pth_na->up.num_rx_rings; @@ -695,7 +706,7 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na, } ptns->kctxs[k] = nm_os_kctx_create(&nmk_cfg, - cfg->cfgtype, cfg_entries + k * cfg->entry_size); + cfg_entries + k * cfg->entry_size); if (ptns->kctxs[k] == NULL) { goto err; } @@ -761,34 +772,6 @@ ptnetmap_stop_kctx_workers(struct netmap_pt_host_adapter *pth_na) } } -static struct ptnetmap_cfg * -ptnetmap_read_cfg(struct nmreq *nmr) -{ - uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1; - struct ptnetmap_cfg *cfg; - struct ptnetmap_cfg tmp; - size_t cfglen; - - if (copyin((const void *)*nmr_ptncfg, &tmp, sizeof(tmp))) { - D("Partial copyin() failed"); - return NULL; - } - - cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size; - cfg = nm_os_malloc(cfglen); - if (!cfg) { - return NULL; - } - - if (copyin((const void *)*nmr_ptncfg, cfg, cfglen)) { - D("Full copyin() failed"); - nm_os_free(cfg); - return NULL; - } - - return cfg; -} - static int nm_unused_notify(struct netmap_kring *, int); static int nm_pt_host_notify(struct netmap_kring *, int); @@ -864,14 +847,14 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na, } for (i = 0; i < pth_na->parent->num_rx_rings; i++) { - pth_na->up.rx_rings[i].save_notify = - pth_na->up.rx_rings[i].nm_notify; - pth_na->up.rx_rings[i].nm_notify = nm_pt_host_notify; + pth_na->up.rx_rings[i]->save_notify = + pth_na->up.rx_rings[i]->nm_notify; + pth_na->up.rx_rings[i]->nm_notify = nm_pt_host_notify; } for (i = 0; i < pth_na->parent->num_tx_rings; i++) { - pth_na->up.tx_rings[i].save_notify = - pth_na->up.tx_rings[i].nm_notify; - pth_na->up.tx_rings[i].nm_notify = nm_pt_host_notify; + pth_na->up.tx_rings[i]->save_notify = + pth_na->up.tx_rings[i]->nm_notify; + pth_na->up.tx_rings[i]->nm_notify = nm_pt_host_notify; } #ifdef RATE @@ -912,14 +895,14 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na) pth_na->parent->na_flags = pth_na->parent_na_flags; for (i = 0; i < pth_na->parent->num_rx_rings; i++) { - pth_na->up.rx_rings[i].nm_notify = - pth_na->up.rx_rings[i].save_notify; - pth_na->up.rx_rings[i].save_notify = NULL; + pth_na->up.rx_rings[i]->nm_notify = + pth_na->up.rx_rings[i]->save_notify; + pth_na->up.rx_rings[i]->save_notify = NULL; } for (i = 0; i < pth_na->parent->num_tx_rings; i++) { - pth_na->up.tx_rings[i].nm_notify = - pth_na->up.tx_rings[i].save_notify; - pth_na->up.tx_rings[i].save_notify = NULL; + pth_na->up.tx_rings[i]->nm_notify = + pth_na->up.tx_rings[i]->save_notify; + pth_na->up.tx_rings[i]->save_notify = NULL; } /* Destroy kernel contexts. */ @@ -941,66 +924,55 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na) /* * Called by netmap_ioctl(). - * Operation is indicated in nmr->nr_cmd. + * Operation is indicated in nr_name. * * Called without NMG_LOCK. */ int -ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na) +ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na) { - struct netmap_pt_host_adapter *pth_na; - struct ptnetmap_cfg *cfg; - char *name; - int cmd, error = 0; - - name = nmr->nr_name; - cmd = nmr->nr_cmd; - - DBG(D("name: %s", name)); + struct netmap_pt_host_adapter *pth_na; + struct ptnetmap_cfg *cfg = NULL; + int error = 0; - if (!nm_ptnetmap_host_on(na)) { - D("ERROR Netmap adapter %p is not a ptnetmap host adapter", na); - error = ENXIO; - goto done; - } - pth_na = (struct netmap_pt_host_adapter *)na; + DBG(D("name: %s", nr_name)); - NMG_LOCK(); - switch (cmd) { - case NETMAP_PT_HOST_CREATE: - /* Read hypervisor configuration from userspace. */ - cfg = ptnetmap_read_cfg(nmr); - if (!cfg) - break; - /* Create ptnetmap state (kctxs, ...) and switch parent - * adapter to ptnetmap mode. */ - error = ptnetmap_create(pth_na, cfg); - nm_os_free(cfg); - if (error) - break; - /* Start kthreads. */ - error = ptnetmap_start_kctx_workers(pth_na); - if (error) - ptnetmap_delete(pth_na); - break; - - case NETMAP_PT_HOST_DELETE: - /* Stop kthreads. */ - ptnetmap_stop_kctx_workers(pth_na); - /* Switch parent adapter back to normal mode and destroy - * ptnetmap state (kthreads, ...). */ - ptnetmap_delete(pth_na); - break; + if (!nm_ptnetmap_host_on(na)) { + D("ERROR Netmap adapter %p is not a ptnetmap host adapter", + na); + return ENXIO; + } + pth_na = (struct netmap_pt_host_adapter *)na; - default: - D("ERROR invalid cmd (nmr->nr_cmd) (0x%x)", cmd); - error = EINVAL; - break; - } - NMG_UNLOCK(); + NMG_LOCK(); + if (create) { + /* Read hypervisor configuration from userspace. */ + /* TODO */ + if (!cfg) { + goto out; + } + /* Create ptnetmap state (kctxs, ...) and switch parent + * adapter to ptnetmap mode. */ + error = ptnetmap_create(pth_na, cfg); + nm_os_free(cfg); + if (error) { + goto out; + } + /* Start kthreads. */ + error = ptnetmap_start_kctx_workers(pth_na); + if (error) + ptnetmap_delete(pth_na); + } else { + /* Stop kthreads. */ + ptnetmap_stop_kctx_workers(pth_na); + /* Switch parent adapter back to normal mode and destroy + * ptnetmap state (kthreads, ...). */ + ptnetmap_delete(pth_na); + } +out: + NMG_UNLOCK(); -done: - return error; + return error; } /* nm_notify callbacks for ptnetmap */ @@ -1048,8 +1020,7 @@ nm_unused_notify(struct netmap_kring *kring, int flags) /* nm_config callback for bwrap */ static int -nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) +nm_pt_host_config(struct netmap_adapter *na, struct nm_config_info *info) { struct netmap_pt_host_adapter *pth_na = (struct netmap_pt_host_adapter *)na; @@ -1061,12 +1032,11 @@ nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd, /* forward the request */ error = netmap_update_config(parent); - *rxr = na->num_rx_rings = parent->num_rx_rings; - *txr = na->num_tx_rings = parent->num_tx_rings; - *txd = na->num_tx_desc = parent->num_tx_desc; - *rxd = na->num_rx_desc = parent->num_rx_desc; - - DBG(D("rxr: %d txr: %d txd: %d rxd: %d", *rxr, *txr, *txd, *rxd)); + info->num_rx_rings = na->num_rx_rings = parent->num_rx_rings; + info->num_tx_rings = na->num_tx_rings = parent->num_tx_rings; + info->num_tx_descs = na->num_tx_desc = parent->num_tx_desc; + info->num_rx_descs = na->num_rx_desc = parent->num_rx_desc; + info->rx_buf_maxsize = na->rx_buf_maxsize = parent->rx_buf_maxsize; return error; } @@ -1107,7 +1077,7 @@ nm_pt_host_krings_create(struct netmap_adapter *na) * host rings independently on what the regif asked for: * these rings are needed by the guest ptnetmap adapter * anyway. */ - kring = &NMR(na, t)[nma_get_nrings(na, t)]; + kring = NMR(na, t)[nma_get_nrings(na, t)]; kring->nr_kflags |= NKR_NEEDRING; } @@ -1187,17 +1157,18 @@ nm_pt_host_dtor(struct netmap_adapter *na) /* check if nmr is a request for a ptnetmap adapter that we can satisfy */ int -netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - struct nmreq parent_nmr; + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; + struct nmreq_register preq; struct netmap_adapter *parent; /* target adapter */ struct netmap_pt_host_adapter *pth_na; struct ifnet *ifp = NULL; int error; /* Check if it is a request for a ptnetmap adapter */ - if ((nmr->nr_flags & (NR_PTNETMAP_HOST)) == 0) { + if ((req->nr_flags & (NR_PTNETMAP_HOST)) == 0) { return 0; } @@ -1210,12 +1181,14 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, } /* first, try to find the adapter that we want to passthrough - * We use the same nmr, after we have turned off the ptnetmap flag. + * We use the same req, after we have turned off the ptnetmap flag. * In this way we can potentially passthrough everything netmap understands. */ - memcpy(&parent_nmr, nmr, sizeof(parent_nmr)); - parent_nmr.nr_flags &= ~(NR_PTNETMAP_HOST); - error = netmap_get_na(&parent_nmr, &parent, &ifp, nmd, create); + memcpy(&preq, req, sizeof(preq)); + preq.nr_flags &= ~(NR_PTNETMAP_HOST); + hdr->nr_body = (uint64_t)&preq; + error = netmap_get_na(hdr, &parent, &ifp, nmd, create); + hdr->nr_body = (uint64_t)req; if (error) { D("parent lookup failed: %d", error); goto put_out_noputparent; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index d364699bce269..6e0748acd5300 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -166,7 +166,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, "Max batch size to be used in the bridge"); SYSEND; -static int netmap_vp_create(struct nmreq *, struct ifnet *, +static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *, struct netmap_mem_d *nmd, struct netmap_vp_adapter **); static int netmap_vp_reg(struct netmap_adapter *na, int onoff); static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); @@ -188,6 +188,9 @@ struct nm_hash_ent { uint64_t ports; }; +/* Holds the default callbacks */ +static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL}; + /* * nm_bridge is a descriptor for a VALE switch. * Interfaces for a bridge are all in bdg_ports[]. @@ -201,37 +204,50 @@ struct nm_hash_ent { * bdg_lock protects accesses to the bdg_ports array. * This is a rw lock (or equivalent). */ +#define NM_BDG_IFNAMSIZ IFNAMSIZ struct nm_bridge { /* XXX what is the proper alignment/layout ? */ BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ int bdg_namelen; - uint32_t bdg_active_ports; /* 0 means free */ - char bdg_basename[IFNAMSIZ]; + uint32_t bdg_active_ports; + char bdg_basename[NM_BDG_IFNAMSIZ]; /* Indexes of active ports (up to active_ports) * and all other remaining ports. */ - uint8_t bdg_port_index[NM_BDG_MAXPORTS]; + uint32_t bdg_port_index[NM_BDG_MAXPORTS]; + /* used by netmap_bdg_detach_common() */ + uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS]; struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; - /* - * The function to decide the destination port. + * Programmable lookup functions to figure out the destination port. * It returns either of an index of the destination port, * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to * forward this packet. ring_nr is the source ring index, and the * function may overwrite this value to forward this packet to a * different ring index. - * This function must be set by netmap_bdg_ctl(). + * The function is set by netmap_bdg_regops(). + */ + struct netmap_bdg_ops *bdg_ops; + + /* + * Contains the data structure used by the bdg_ops.lookup function. + * By default points to *ht which is allocated on attach and used by the default lookup + * otherwise will point to the data structure received by netmap_bdg_regops(). */ - struct netmap_bdg_ops bdg_ops; + void *private_data; + struct nm_hash_ent *ht; - /* the forwarding table, MAC+ports. - * XXX should be changed to an argument to be passed to - * the lookup function + /* Currently used to specify if the bridge is still in use while empty and + * if it has been put in exclusive mode by an external module, see netmap_bdg_regops() + * and netmap_bdg_create(). */ - struct nm_hash_ent *ht; // allocated on attach +#define NM_BDG_ACTIVE 1 +#define NM_BDG_EXCLUSIVE 2 + uint8_t bdg_flags; + #ifdef CONFIG_NET_NS struct net *ns; @@ -309,18 +325,17 @@ nm_vale_name_validate(const char *name) return -1; } - for (i = 0; name[i]; i++) { + for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { if (name[i] == ':') { - if (colon_pos != -1) { - return -1; - } colon_pos = i; + break; } else if (!nm_is_id_char(name[i])) { return -1; } } - if (i >= IFNAMSIZ) { + if (strlen(name) - colon_pos > IFNAMSIZ) { + /* interface name too long */ return -1; } @@ -355,7 +370,7 @@ nm_find_bridge(const char *name, int create) for (i = 0; i < num_bridges; i++) { struct nm_bridge *x = bridges + i; - if (x->bdg_active_ports == 0) { + if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { if (create && b == NULL) b = x; /* record empty slot */ } else if (x->bdg_namelen != namelen) { @@ -381,7 +396,9 @@ nm_find_bridge(const char *name, int create) for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ - b->bdg_ops.lookup = netmap_bdg_learning; + b->bdg_ops = &default_bdg_ops; + b->private_data = b->ht; + b->bdg_flags = 0; NM_BNS_GET(b); } return b; @@ -395,15 +412,15 @@ static void nm_free_bdgfwd(struct netmap_adapter *na) { int nrings, i; - struct netmap_kring *kring; + struct netmap_kring **kring; NMG_LOCK_ASSERT(); nrings = na->num_tx_rings; kring = na->tx_rings; for (i = 0; i < nrings; i++) { - if (kring[i].nkr_ft) { - nm_os_free(kring[i].nkr_ft); - kring[i].nkr_ft = NULL; /* protect from freeing twice */ + if (kring[i]->nkr_ft) { + nm_os_free(kring[i]->nkr_ft); + kring[i]->nkr_ft = NULL; /* protect from freeing twice */ } } } @@ -416,7 +433,7 @@ static int nm_alloc_bdgfwd(struct netmap_adapter *na) { int nrings, l, i, num_dstq; - struct netmap_kring *kring; + struct netmap_kring **kring; NMG_LOCK_ASSERT(); /* all port:rings + broadcast */ @@ -442,8 +459,23 @@ nm_alloc_bdgfwd(struct netmap_adapter *na) dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; dstq[j].bq_len = 0; } - kring[i].nkr_ft = ft; + kring[i]->nkr_ft = ft; + } + return 0; +} + +static int +netmap_bdg_free(struct nm_bridge *b) +{ + if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { + return EBUSY; } + + ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); + b->bdg_ops = NULL; + b->bdg_flags = 0; + NM_BNS_PUT(b); return 0; } @@ -456,7 +488,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) { int s_hw = hw, s_sw = sw; int i, lim =b->bdg_active_ports; - uint8_t tmp[NM_BDG_MAXPORTS]; + uint32_t *tmp = b->tmp_bdg_port_index; /* New algorithm: @@ -473,7 +505,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) /* make a copy of the list of active ports, update it, * and then copy back within BDG_WLOCK(). */ - memcpy(tmp, b->bdg_port_index, sizeof(tmp)); + memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { if (hw >= 0 && tmp[i] == hw) { ND("detach hw %d at %d", hw, i); @@ -496,35 +528,117 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) } BDG_WLOCK(b); - if (b->bdg_ops.dtor) - b->bdg_ops.dtor(b->bdg_ports[s_hw]); + if (b->bdg_ops->dtor) + b->bdg_ops->dtor(b->bdg_ports[s_hw]); b->bdg_ports[s_hw] = NULL; if (s_sw >= 0) { b->bdg_ports[s_sw] = NULL; } - memcpy(b->bdg_port_index, tmp, sizeof(tmp)); + memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); b->bdg_active_ports = lim; BDG_WUNLOCK(b); ND("now %d active ports", lim); - if (lim == 0) { - ND("marking bridge %s as free", b->bdg_basename); - nm_os_free(b->ht); - bzero(&b->bdg_ops, sizeof(b->bdg_ops)); - NM_BNS_PUT(b); + netmap_bdg_free(b); +} + +static inline void * +nm_bdg_get_auth_token(struct nm_bridge *b) +{ + return b->ht; +} + +/* bridge not in exclusive mode ==> always valid + * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token + */ +static inline int +nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token) +{ + return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token; +} + +/* Allows external modules to create bridges in exclusive mode, + * returns an authentication token that the external module will need + * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(), + * and nm_bdg_update_private_data() operations. + * Successfully executed if ret != NULL and *return_status == 0. + */ +void * +netmap_bdg_create(const char *bdg_name, int *return_status) +{ + struct nm_bridge *b = NULL; + void *ret = NULL; + + NMG_LOCK(); + b = nm_find_bridge(bdg_name, 0 /* don't create */); + if (b) { + *return_status = EEXIST; + goto unlock_bdg_create; + } + + b = nm_find_bridge(bdg_name, 1 /* create */); + if (!b) { + *return_status = ENOMEM; + goto unlock_bdg_create; + } + + b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE; + ret = nm_bdg_get_auth_token(b); + *return_status = 0; + +unlock_bdg_create: + NMG_UNLOCK(); + return ret; +} + +/* Allows external modules to destroy a bridge created through + * netmap_bdg_create(), the bridge must be empty. + */ +int +netmap_bdg_destroy(const char *bdg_name, void *auth_token) +{ + struct nm_bridge *b = NULL; + int ret = 0; + + NMG_LOCK(); + b = nm_find_bridge(bdg_name, 0 /* don't create */); + if (!b) { + ret = ENXIO; + goto unlock_bdg_free; + } + + if (!nm_bdg_valid_auth_token(b, auth_token)) { + ret = EACCES; + goto unlock_bdg_free; + } + if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) { + ret = EINVAL; + goto unlock_bdg_free; } + + b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE); + ret = netmap_bdg_free(b); + if (ret) { + b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE; + } + +unlock_bdg_free: + NMG_UNLOCK(); + return ret; } + + /* nm_bdg_ctl callback for VALE ports */ static int -netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) +netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; struct nm_bridge *b = vpna->na_bdg; - (void)nmr; // XXX merge ? - if (attach) + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { return 0; /* nothing to do */ + } if (b) { netmap_set_all_rings(na, 0 /* disable */); netmap_bdg_detach_common(b, vpna->bdg_port, -1); @@ -560,8 +674,38 @@ netmap_vp_dtor(struct netmap_adapter *na) } } +/* creates a persistent VALE port */ +int +nm_vi_create(struct nmreq_header *hdr) +{ + struct nmreq_vale_newif *req = + (struct nmreq_vale_newif *)hdr->nr_body; + int error = 0; + /* Build a nmreq_register out of the nmreq_vale_newif, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_tx_slots = req->nr_tx_slots; + regreq.nr_rx_slots = req->nr_rx_slots; + regreq.nr_tx_rings = req->nr_tx_rings; + regreq.nr_rx_rings = req->nr_rx_rings; + regreq.nr_mem_id = req->nr_mem_id; + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uint64_t)®req; + error = netmap_vi_create(hdr, 0 /* no autodelete */); + hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; + hdr->nr_body = (uint64_t)req; + /* Write back to the original struct. */ + req->nr_tx_slots = regreq.nr_tx_slots; + req->nr_rx_slots = regreq.nr_rx_slots; + req->nr_tx_rings = regreq.nr_tx_rings; + req->nr_rx_rings = regreq.nr_rx_rings; + req->nr_mem_id = regreq.nr_mem_id; + return error; +} + /* remove a persistent VALE port from the system */ -static int +int nm_vi_destroy(const char *name) { struct ifnet *ifp; @@ -611,17 +755,14 @@ err: } static int -nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) +nm_update_info(struct nmreq_register *req, struct netmap_adapter *na) { - uint64_t memsize; - int ret; - nmr->nr_rx_rings = na->num_rx_rings; - nmr->nr_tx_rings = na->num_tx_rings; - nmr->nr_rx_slots = na->num_rx_desc; - nmr->nr_tx_slots = na->num_tx_desc; - ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2); - nmr->nr_memsize = (uint32_t)memsize; - return ret; + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL, + &req->nr_mem_id); } /* @@ -629,22 +770,30 @@ nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) * The interface will be attached to a bridge later. */ int -netmap_vi_create(struct nmreq *nmr, int autodelete) +netmap_vi_create(struct nmreq_header *hdr, int autodelete) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct ifnet *ifp; struct netmap_vp_adapter *vpna; struct netmap_mem_d *nmd = NULL; int error; + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } + /* don't include VALE prefix */ - if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) + if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) + return EINVAL; + if (strlen(hdr->nr_name) >= IFNAMSIZ) { return EINVAL; - ifp = ifunit_ref(nmr->nr_name); + } + ifp = ifunit_ref(hdr->nr_name); if (ifp) { /* already exist, cannot create new one */ error = EEXIST; NMG_LOCK(); if (NM_NA_VALID(ifp)) { - int update_err = nm_update_info(nmr, NA(ifp)); + int update_err = nm_update_info(req, NA(ifp)); if (update_err) error = update_err; } @@ -652,20 +801,20 @@ netmap_vi_create(struct nmreq *nmr, int autodelete) if_rele(ifp); return error; } - error = nm_os_vi_persist(nmr->nr_name, &ifp); + error = nm_os_vi_persist(hdr->nr_name, &ifp); if (error) return error; NMG_LOCK(); - if (nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (req->nr_mem_id) { + nmd = netmap_mem_find(req->nr_mem_id); if (nmd == NULL) { error = EINVAL; goto err_1; } } /* netmap_vp_create creates a struct netmap_vp_adapter */ - error = netmap_vp_create(nmr, ifp, nmd, &vpna); + error = netmap_vp_create(hdr, ifp, nmd, &vpna); if (error) { D("error %d", error); goto err_1; @@ -679,15 +828,15 @@ netmap_vi_create(struct nmreq *nmr, int autodelete) } NM_ATTACH_NA(ifp, &vpna->up); /* return the updated info */ - error = nm_update_info(nmr, &vpna->up); + error = nm_update_info(req, &vpna->up); if (error) { goto err_2; } - D("returning nr_arg2 %d", nmr->nr_arg2); + ND("returning nr_mem_id %d", req->nr_mem_id); if (nmd) netmap_mem_put(nmd); NMG_UNLOCK(); - D("created %s", ifp->if_xname); + ND("created %s", ifp->if_xname); return 0; err_2: @@ -711,16 +860,17 @@ err_1: * (*na != NULL && return == 0). */ int -netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, +netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create) { - char *nr_name = nmr->nr_name; + char *nr_name = hdr->nr_name; const char *ifname; struct ifnet *ifp = NULL; int error = 0; struct netmap_vp_adapter *vpna, *hostna = NULL; struct nm_bridge *b; - int i, j, cand = -1, cand2 = -1; + uint32_t i, j; + uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; int needed; *na = NULL; /* default return value */ @@ -780,17 +930,17 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, ifname = nr_name + b->bdg_namelen + 1; ifp = ifunit_ref(ifname); if (!ifp) { - /* Create an ephemeral virtual port - * This block contains all the ephemeral-specific logics + /* Create an ephemeral virtual port. + * This block contains all the ephemeral-specific logic. */ - if (nmr->nr_cmd) { - /* nr_cmd must be 0 for a virtual port */ + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { error = EINVAL; goto out; } /* bdg_netmap_attach creates a struct netmap_adapter */ - error = netmap_vp_create(nmr, NULL, nmd, &vpna); + error = netmap_vp_create(hdr, NULL, nmd, &vpna); if (error) { D("error %d", error); goto out; @@ -798,15 +948,16 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, /* shortcut - we can skip get_hw_na(), * ownership check and nm_bdg_attach() */ + } else { struct netmap_adapter *hw; /* the vale:nic syntax is only valid for some commands */ - switch (nmr->nr_cmd) { - case NETMAP_BDG_ATTACH: - case NETMAP_BDG_DETACH: - case NETMAP_BDG_POLLING_ON: - case NETMAP_BDG_POLLING_OFF: + switch (hdr->nr_reqtype) { + case NETMAP_REQ_VALE_ATTACH: + case NETMAP_REQ_VALE_DETACH: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: break; /* ok */ default: error = EINVAL; @@ -823,8 +974,14 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, goto out; vpna = hw->na_vp; hostna = hw->na_hostvp; - if (nmr->nr_arg1 != NETMAP_BDG_HOST) - hostna = NULL; + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + /* Check if we need to skip the host rings. */ + struct nmreq_vale_attach *areq = + (struct nmreq_vale_attach *)hdr->nr_body; + if (areq->reg.nr_mode != NR_REG_NIC_SW) { + hostna = NULL; + } + } } BDG_WLOCK(b); @@ -854,34 +1011,46 @@ out: return error; } - -/* Process NETMAP_BDG_ATTACH */ -static int -nm_bdg_ctl_attach(struct nmreq *nmr) +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + struct netmap_vp_adapter * vpna; struct netmap_adapter *na; struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; int error; NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } - if (nmr->nr_arg2) { - nmd = netmap_mem_find(nmr->nr_arg2); + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); if (nmd == NULL) { error = EINVAL; goto unlock_exit; } } - /* XXX check existing one */ - error = netmap_get_bdg_na(nmr, &na, nmd, 0); + /* check for existing one */ + error = netmap_get_bdg_na(hdr, &na, nmd, 0); if (!error) { error = EBUSY; goto unref_exit; } - error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); - if (error) /* no device */ + error = netmap_get_bdg_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ goto unlock_exit; + } if (na == NULL) { /* VALE prefix missing */ error = EINVAL; @@ -897,11 +1066,13 @@ nm_bdg_ctl_attach(struct nmreq *nmr) /* nop for VALE ports. The bwrap needs to put the hwna * in netmap mode (see netmap_bwrap_bdg_ctl) */ - error = na->nm_bdg_ctl(na, nmr, 1); + error = na->nm_bdg_ctl(hdr, na); if (error) goto unref_exit; ND("registered %s to netmap-mode", na->name); } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; NMG_UNLOCK(); return 0; @@ -918,15 +1089,26 @@ nm_is_bwrap(struct netmap_adapter *na) return na->nm_register == netmap_bwrap_reg; } -/* process NETMAP_BDG_DETACH */ -static int -nm_bdg_ctl_detach(struct nmreq *nmr) +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) { + struct nmreq_vale_detach *nmreq_det = (void *)hdr->nr_body; + struct netmap_vp_adapter *vpna; struct netmap_adapter *na; + struct nm_bridge *b = NULL; int error; NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */); if (error) { /* no device, or another bridge or user owns the device */ goto unlock_exit; } @@ -938,16 +1120,27 @@ nm_bdg_ctl_detach(struct nmreq *nmr) ((struct netmap_bwrap_adapter *)na)->na_polling_state) { /* Don't detach a NIC with polling */ error = EBUSY; - netmap_adapter_put(na); - goto unlock_exit; + goto unref_exit; } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + if (na->nm_bdg_ctl) { /* remove the port from bridge. The bwrap * also needs to put the hwna in normal mode */ - error = na->nm_bdg_ctl(na, nmr, 0); + error = na->nm_bdg_ctl(hdr, na); } +unref_exit: netmap_adapter_put(na); unlock_exit: NMG_UNLOCK(); @@ -968,7 +1161,7 @@ struct nm_bdg_polling_state { bool configured; bool stopped; struct netmap_bwrap_adapter *bna; - u_int reg; + uint32_t mode; u_int qfirst; u_int qlast; u_int cpu_from; @@ -982,7 +1175,7 @@ netmap_bwrap_polling(void *data, int is_kthread) struct nm_bdg_kthread *nbk = data; struct netmap_bwrap_adapter *bna; u_int qfirst, qlast, i; - struct netmap_kring *kring0, *kring; + struct netmap_kring **kring0, *kring; if (!nbk) return; @@ -992,7 +1185,7 @@ netmap_bwrap_polling(void *data, int is_kthread) kring0 = NMR(bna->hwna, NR_RX); for (i = qfirst; i < qlast; i++) { - kring = kring0 + i; + kring = kring0[i]; kring->nm_notify(kring, 0); } } @@ -1012,7 +1205,8 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) kcfg.use_kthread = 1; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; - int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); + int all = (bps->ncpus == 1 && + bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); int affinity = bps->cpu_from + i; t->bps = bps; @@ -1023,7 +1217,7 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) kcfg.type = i; kcfg.worker_private = t; - t->nmk = nm_os_kctx_create(&kcfg, 0, NULL); + t->nmk = nm_os_kctx_create(&kcfg, NULL); if (t->nmk == NULL) { goto cleanup; } @@ -1088,67 +1282,68 @@ nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) } static int -get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, - struct nm_bdg_polling_state *bps) +get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, + struct nm_bdg_polling_state *bps) { - int req_cpus, avail_cpus, core_from; - u_int reg, i, qfirst, qlast; + unsigned int avail_cpus, core_from; + unsigned int qfirst, qlast; + uint32_t i = req->nr_first_cpu_id; + uint32_t req_cpus = req->nr_num_polling_cpus; avail_cpus = nm_os_ncpus(); - req_cpus = nmr->nr_arg1; if (req_cpus == 0) { D("req_cpus must be > 0"); return EINVAL; } else if (req_cpus >= avail_cpus) { - D("for safety, we need at least one core left in the system"); + D("Cannot use all the CPUs in the system"); return EINVAL; } - reg = nmr->nr_flags & NR_REG_MASK; - i = nmr->nr_ringid & NETMAP_RING_MASK; - /* - * ONE_NIC: dedicate one core to one ring. If multiple cores - * are specified, consecutive rings are also polled. - * For example, if ringid=2 and 2 cores are given, - * ring 2 and 3 are polled by core 2 and 3, respectively. - * ALL_NIC: poll all the rings using a core specified by ringid. - * the number of cores must be 1. - */ - if (reg == NR_REG_ONE_NIC) { + + if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { + /* Use a separate core for each ring. If nr_num_polling_cpus>1 + * more consecutive rings are polled. + * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, + * ring 2 and 3 are polled by core 2 and 3, respectively. */ if (i + req_cpus > nma_get_nrings(na, NR_RX)) { - D("only %d rings exist (ring %u-%u is given)", - nma_get_nrings(na, NR_RX), i, i+req_cpus); + D("Rings %u-%u not in range (have %d rings)", + i, i + req_cpus, nma_get_nrings(na, NR_RX)); return EINVAL; } qfirst = i; qlast = qfirst + req_cpus; core_from = qfirst; - } else if (reg == NR_REG_ALL_NIC) { + + } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { + /* Poll all the rings using a core specified by nr_first_cpu_id. + * the number of cores must be 1. */ if (req_cpus != 1) { - D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); + D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " + "(was %d)", req_cpus); return EINVAL; } qfirst = 0; qlast = nma_get_nrings(na, NR_RX); core_from = i; } else { - D("reg must be ALL_NIC or ONE_NIC"); + D("Invalid polling mode"); return EINVAL; } - bps->reg = reg; + bps->mode = req->nr_mode; bps->qfirst = qfirst; bps->qlast = qlast; bps->cpu_from = core_from; bps->ncpus = req_cpus; D("%s qfirst %u qlast %u cpu_from %u ncpus %u", - reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", + req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? + "MULTI" : "SINGLE", qfirst, qlast, core_from, req_cpus); return 0; } static int -nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) +nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) { struct nm_bdg_polling_state *bps; struct netmap_bwrap_adapter *bna; @@ -1166,7 +1361,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) bps->configured = false; bps->stopped = true; - if (get_polling_cfg(nmr, na, bps)) { + if (get_polling_cfg(req, na, bps)) { nm_os_free(bps); return EINVAL; } @@ -1195,7 +1390,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) } static int -nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) +nm_bdg_ctl_polling_stop(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct nm_bdg_polling_state *bps; @@ -1214,190 +1409,203 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) return 0; } -/* Called by either user's context (netmap_ioctl()) - * or external kernel modules (e.g., Openvswitch). - * Operation is indicated in nmr->nr_cmd. - * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge - * requires bdg_ops argument; the other commands ignore this argument. - * - * Called without NMG_LOCK. - */ int -netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) +nm_bdg_polling(struct nmreq_header *hdr) +{ + struct nmreq_vale_polling *req = + (struct nmreq_vale_polling *)hdr->nr_body; + struct netmap_adapter *na = NULL; + int error = 0; + + NMG_LOCK(); + error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0); + if (na && !error) { + if (!nm_is_bwrap(na)) { + error = EOPNOTSUPP; + } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { + error = nm_bdg_ctl_polling_start(req, na); + if (!error) + netmap_adapter_get(na); + } else { + error = nm_bdg_ctl_polling_stop(na); + if (!error) + netmap_adapter_put(na); + } + netmap_adapter_put(na); + } else if (!na && !error) { + /* Not VALE port. */ + error = EINVAL; + } + NMG_UNLOCK(); + + return error; +} + +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_bdg_list(struct nmreq_header *hdr) { + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)hdr->nr_body; + int namelen = strlen(hdr->nr_name); struct nm_bridge *b, *bridges; - struct netmap_adapter *na; struct netmap_vp_adapter *vpna; - char *name = nmr->nr_name; - int cmd = nmr->nr_cmd, namelen = strlen(name); int error = 0, i, j; u_int num_bridges; netmap_bns_getbridges(&bridges, &num_bridges); - switch (cmd) { - case NETMAP_BDG_NEWIF: - error = netmap_vi_create(nmr, 0 /* no autodelete */); - break; - - case NETMAP_BDG_DELIF: - error = nm_vi_destroy(nmr->nr_name); - break; - - case NETMAP_BDG_ATTACH: - error = nm_bdg_ctl_attach(nmr); - break; - - case NETMAP_BDG_DETACH: - error = nm_bdg_ctl_detach(nmr); - break; - - case NETMAP_BDG_LIST: - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { - error = EINVAL; - break; - } - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); - if (!b) { - error = ENOENT; - NMG_UNLOCK(); - break; - } - - error = 0; - nmr->nr_arg1 = b - bridges; /* bridge index */ - nmr->nr_arg2 = NM_BDG_NOPORT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("---AAAAAAAAARGH-------"); - continue; - } - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(vpna->up.name, name)) { - nmr->nr_arg2 = i; /* port index */ - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = nmr->nr_arg1; - j = nmr->nr_arg2; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = bridges + i; - for ( ; j < NM_BDG_MAXPORTS; j++) { - if (b->bdg_ports[j] == NULL) - continue; - vpna = b->bdg_ports[j]; - strncpy(name, vpna->up.name, (size_t)IFNAMSIZ); - error = 0; - goto out; - } - j = 0; /* following bridges scan from 0 */ - } - out: - nmr->nr_arg1 = i; - nmr->nr_arg2 = j; - NMG_UNLOCK(); - } - break; - - case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */ - /* register callbacks to the given bridge. - * nmr->nr_name may be just bridge's name (including ':' - * if it is not just NM_NAME). - */ - if (!bdg_ops) { - error = EINVAL; - break; + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; } NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); if (!b) { - error = EINVAL; - } else { - b->bdg_ops = *bdg_ops; + NMG_UNLOCK(); + return ENOENT; } - NMG_UNLOCK(); - break; - case NETMAP_BDG_VNET_HDR: - /* Valid lengths for the virtio-net header are 0 (no header), - 10 and 12. */ - if (nmr->nr_arg1 != 0 && - nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) && - nmr->nr_arg1 != 12) { - error = EINVAL; - break; - } - NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0); - if (na && !error) { - vpna = (struct netmap_vp_adapter *)na; - na->virt_hdr_len = nmr->nr_arg1; - if (na->virt_hdr_len) { - vpna->mfs = NETMAP_BUF_SIZE(na); + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + D("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; } - D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); - netmap_adapter_put(na); - } else if (!na) { - error = ENXIO; } NMG_UNLOCK(); - break; + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; - case NETMAP_BDG_POLLING_ON: - case NETMAP_BDG_POLLING_OFF: NMG_LOCK(); - error = netmap_get_bdg_na(nmr, &na, NULL, 0); - if (na && !error) { - if (!nm_is_bwrap(na)) { - error = EOPNOTSUPP; - } else if (cmd == NETMAP_BDG_POLLING_ON) { - error = nm_bdg_ctl_polling_start(nmr, na); - if (!error) - netmap_adapter_get(na); - } else { - error = nm_bdg_ctl_polling_stop(nmr, na); - if (!error) - netmap_adapter_put(na); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strncpy(hdr->nr_name, vpna->up.name, + (size_t)IFNAMSIZ); + error = 0; + goto out; } - netmap_adapter_put(na); + j = 0; /* following bridges scan from 0 */ } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; NMG_UNLOCK(); - break; + } + + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to set configure/lookup/dtor functions of a VALE instance. + * Register callbacks to the given bridge. 'name' may be just + * bridge's name (including ':' if it is not just NM_BDG_NAME). + * + * Called without NMG_LOCK. + */ + +int +netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) +{ + struct nm_bridge *b; + int error = 0; + + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */); + if (!b) { + error = ENXIO; + goto unlock_regops; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_regops; + } + + BDG_WLOCK(b); + if (!bdg_ops) { + /* resetting the bridge */ + bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); + b->bdg_ops = &default_bdg_ops; + b->private_data = b->ht; + } else { + /* modifying the bridge */ + b->private_data = private_data; + b->bdg_ops = bdg_ops; + } + BDG_WUNLOCK(b); + +unlock_regops: + NMG_UNLOCK(); + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to modify the private data previously given to regops(). + * 'name' may be just bridge's name (including ':' if it + * is not just NM_BDG_NAME). + * Called without NMG_LOCK. + */ +int +nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token) +{ + void *private_data = NULL; + struct nm_bridge *b; + int error = 0; - default: - D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd); + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */); + if (!b) { error = EINVAL; - break; + goto unlock_update_priv; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_update_priv; } + BDG_WLOCK(b); + private_data = callback(b->private_data, callback_data, &error); + b->private_data = private_data; + BDG_WUNLOCK(b); + +unlock_update_priv: + NMG_UNLOCK(); return error; } int -netmap_bdg_config(struct nmreq *nmr) +netmap_bdg_config(struct nm_ifreq *nr) { struct nm_bridge *b; int error = EINVAL; NMG_LOCK(); - b = nm_find_bridge(nmr->nr_name, 0); + b = nm_find_bridge(nr->nifr_name, 0); if (!b) { NMG_UNLOCK(); return error; @@ -1405,8 +1613,8 @@ netmap_bdg_config(struct nmreq *nmr) NMG_UNLOCK(); /* Don't call config() with NMG_LOCK() held */ BDG_RLOCK(b); - if (b->bdg_ops.config != NULL) - error = b->bdg_ops.config((struct nm_ifreq *)nmr); + if (b->bdg_ops->config != NULL) + error = b->bdg_ops->config(nr); BDG_RUNLOCK(b); return error; } @@ -1436,7 +1644,7 @@ netmap_vp_krings_create(struct netmap_adapter *na) leases = na->tailroom; for (i = 0; i < nrx; i++) { /* Receive rings */ - na->rx_rings[i].nkr_leases = leases; + na->rx_rings[i]->nkr_leases = leases; leases += na->num_rx_desc; } @@ -1502,6 +1710,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end) ft[ft_i].ft_len = slot->len; ft[ft_i].ft_flags = slot->flags; + ft[ft_i].ft_offset = 0; ND("flags is 0x%x", slot->flags); /* we do not use the buf changed flag, but we still need to reset it */ @@ -1606,7 +1815,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) if (onoff) { for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) kring->nr_mode = NKR_NETMAP_ON; @@ -1622,7 +1831,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) kring->nr_mode = NKR_NETMAP_OFF; @@ -1641,30 +1850,19 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) * and then returns the destination port index, and the * ring in *dst_ring (at the moment, always use ring 0) */ -u_int +uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - struct netmap_vp_adapter *na) + struct netmap_vp_adapter *na, void *private_data) { - uint8_t *buf = ft->ft_buf; - u_int buf_len = ft->ft_len; - struct nm_hash_ent *ht = na->na_bdg->ht; + uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset; + u_int buf_len = ft->ft_len - ft->ft_offset; + struct nm_hash_ent *ht = private_data; uint32_t sh, dh; u_int dst, mysrc = na->bdg_port; uint64_t smac, dmac; uint8_t indbuf[12]; - /* safety check, unfortunately we have many cases */ - if (buf_len >= 14 + na->up.virt_hdr_len) { - /* virthdr + mac_hdr in the same slot */ - buf += na->up.virt_hdr_len; - buf_len -= na->up.virt_hdr_len; - } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { - /* only header in first fragment */ - ft++; - buf = ft->ft_buf; - buf_len = ft->ft_len; - } else { - RD(5, "invalid buf format, length %d", buf_len); + if (buf_len < 14) { return NM_BDG_NOPORT; } @@ -1803,13 +2001,23 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, uint8_t dst_ring = ring_nr; /* default, same ring as origin */ uint16_t dst_port, d_i; struct nm_bdg_q *d; + struct nm_bdg_fwd *start_ft = NULL; ND("slot %d frags %d", i, ft[i].ft_frags); - /* Drop the packet if the virtio-net header is not into the first - fragment nor at the very beginning of the second. */ - if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) + + if (na->up.virt_hdr_len < ft[i].ft_len) { + ft[i].ft_offset = na->up.virt_hdr_len; + start_ft = &ft[i]; + } else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) { + ft[i].ft_offset = ft[i].ft_len; + start_ft = &ft[i+1]; + } else { + /* Drop the packet if the virtio-net header is not into the first + * fragment nor at the very beginning of the second. + */ continue; - dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); + } + dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); if (dst_port >= NM_BDG_NOPORT) @@ -1940,7 +2148,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, nrings = dst_na->up.num_rx_rings; if (dst_nr >= nrings) dst_nr = dst_nr % nrings; - kring = &dst_na->up.rx_rings[dst_nr]; + kring = dst_na->up.rx_rings[dst_nr]; ring = kring->ring; /* the destination ring may have not been opened for RX */ if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) @@ -2224,8 +2432,9 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - if (vpna->na_bdg) + if (vpna->na_bdg) { return netmap_bwrap_attach(name, na); + } na->na_vp = vpna; strncpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; @@ -2236,14 +2445,19 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) * Only persistent VALE ports have a non-null ifp. */ static int -netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, - struct netmap_mem_d *nmd, - struct netmap_vp_adapter **ret) +netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, + struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret) { + struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body; struct netmap_vp_adapter *vpna; struct netmap_adapter *na; int error = 0; u_int npipes = 0; + u_int extrabufs = 0; + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } vpna = nm_os_malloc(sizeof(*vpna)); if (vpna == NULL) @@ -2252,31 +2466,30 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, na = &vpna->up; na->ifp = ifp; - strncpy(na->name, nmr->nr_name, sizeof(na->name)); + strncpy(na->name, hdr->nr_name, sizeof(na->name)); /* bound checking */ - na->num_tx_rings = nmr->nr_tx_rings; + na->num_tx_rings = req->nr_tx_rings; nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_tx_rings = na->num_tx_rings; // write back - na->num_rx_rings = nmr->nr_rx_rings; + req->nr_tx_rings = na->num_tx_rings; /* write back */ + na->num_rx_rings = req->nr_rx_rings; nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL); - nmr->nr_rx_rings = na->num_rx_rings; // write back - nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE, + req->nr_rx_rings = na->num_rx_rings; /* write back */ + nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE, 1, NM_BDG_MAXSLOTS, NULL); - na->num_tx_desc = nmr->nr_tx_slots; - nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE, + na->num_tx_desc = req->nr_tx_slots; + nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE, 1, NM_BDG_MAXSLOTS, NULL); /* validate number of pipes. We want at least 1, * but probably can do with some more. * So let's use 2 as default (when 0 is supplied) */ - npipes = nmr->nr_arg1; nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); - nmr->nr_arg1 = npipes; /* write back */ /* validate extra bufs */ - nm_bound_var(&nmr->nr_arg3, 0, 0, + nm_bound_var(&extrabufs, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); - na->num_rx_desc = nmr->nr_rx_slots; + req->nr_extra_bufs = extrabufs; /* write back */ + na->num_rx_desc = req->nr_rx_slots; /* Set the mfs to a default value, as it is needed on the VALE * mismatch datapath. XXX We should set it according to the MTU * known to the kernel. */ @@ -2299,13 +2512,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, na->nm_krings_create = netmap_vp_krings_create; na->nm_krings_delete = netmap_vp_krings_delete; na->nm_dtor = netmap_vp_dtor; - D("nr_arg2 %d", nmr->nr_arg2); + ND("nr_mem_id %d", req->nr_mem_id); na->nm_mem = nmd ? netmap_mem_get(nmd): netmap_mem_private_new( na->num_tx_rings, na->num_tx_desc, na->num_rx_rings, na->num_rx_desc, - nmr->nr_arg3, npipes, &error); + req->nr_extra_bufs, npipes, &error); if (na->nm_mem == NULL) goto err; na->nm_bdg_attach = netmap_vp_bdg_attach; @@ -2373,8 +2586,9 @@ netmap_bwrap_dtor(struct netmap_adapter *na) ND("na %p", na); na->ifp = NULL; bna->host.up.ifp = NULL; + hwna->na_vp = bna->saved_na_vp; + hwna->na_hostvp = NULL; hwna->na_private = NULL; - hwna->na_vp = hwna->na_hostvp = NULL; hwna->na_flags &= ~NAF_BUSY; netmap_adapter_put(hwna); @@ -2414,7 +2628,7 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) if (netmap_verbose) D("%s %s 0x%x", na->name, kring->name, flags); - bkring = &vpna->up.tx_rings[ring_nr]; + bkring = vpna->up.tx_rings[ring_nr]; /* make sure the ring is not disabled */ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { @@ -2497,8 +2711,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* pass down the pending ring state information */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) - NMR(hwna, t)[i].nr_pending_mode = - NMR(na, t)[i].nr_pending_mode; + NMR(hwna, t)[i]->nr_pending_mode = + NMR(na, t)[i]->nr_pending_mode; } /* forward the request to the hwna */ @@ -2509,8 +2723,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* copy up the current ring state information */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = &NMR(hwna, t)[i]; - NMR(na, t)[i].nr_mode = kring->nr_mode; + struct netmap_kring *kring = NMR(hwna, t)[i]; + NMR(na, t)[i]->nr_mode = kring->nr_mode; } } @@ -2523,15 +2737,15 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) u_int i; /* intercept the hwna nm_nofify callback on the hw rings */ for (i = 0; i < hwna->num_rx_rings; i++) { - hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; - hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; } i = hwna->num_rx_rings; /* for safety */ /* save the host ring notify unconditionally */ - hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; if (hostna->na_bdg) { /* also intercept the host ring notify */ - hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; } if (na->active_fds == 0) na->na_flags |= NAF_NETMAP_ON; @@ -2543,17 +2757,18 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* reset all notify callbacks (including host ring) */ for (i = 0; i <= hwna->num_rx_rings; i++) { - hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; - hwna->rx_rings[i].save_notify = NULL; + hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify; + hwna->rx_rings[i]->save_notify = NULL; } hwna->na_lut.lut = NULL; + hwna->na_lut.plut = NULL; hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; /* pass ownership of the netmap rings to the hwna */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - NMR(na, t)[i].ring = NULL; + NMR(na, t)[i]->ring = NULL; } } @@ -2564,8 +2779,7 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* nm_config callback for bwrap */ static int -netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, - u_int *rxr, u_int *rxd) +netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; @@ -2573,11 +2787,12 @@ netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd, /* forward the request */ netmap_update_config(hwna); - /* swap the results */ - *txr = hwna->num_rx_rings; - *txd = hwna->num_rx_desc; - *rxr = hwna->num_tx_rings; - *rxd = hwna->num_rx_desc; + /* swap the results and propagate */ + info->num_tx_rings = hwna->num_rx_rings; + info->num_tx_descs = hwna->num_rx_desc; + info->num_rx_rings = hwna->num_tx_rings; + info->num_rx_descs = hwna->num_tx_desc; + info->rx_buf_maxsize = hwna->rx_buf_maxsize; return 0; } @@ -2610,7 +2825,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) /* increment the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i].users++; + NMR(hwna, t)[i]->users++; } } @@ -2627,8 +2842,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; - NMR(na, t)[i].ring = NMR(hwna, r)[i].ring; + NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; + NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; } } @@ -2638,16 +2853,16 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) * hostna */ hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hostna->tx_rings[0].na = hostna; + hostna->tx_rings[0]->na = hostna; hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hostna->rx_rings[0].na = hostna; + hostna->rx_rings[0]->na = hostna; } return 0; err_dec_users: for_rx_tx(t) { - NMR(hwna, t)[i].users--; + NMR(hwna, t)[i]->users--; } hwna->nm_krings_delete(hwna); err_del_vp_rings: @@ -2671,7 +2886,7 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) /* decrement the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i].users--; + NMR(hwna, t)[i]->users--; } } @@ -2698,7 +2913,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) (kring ? kring->name : "NULL!"), (na ? na->name : "NULL!"), (hwna ? hwna->name : "NULL!")); - hw_kring = &hwna->tx_rings[ring_n]; + hw_kring = hwna->tx_rings[ring_n]; if (nm_kr_tryget(hw_kring, 0, NULL)) { return ENXIO; @@ -2746,13 +2961,22 @@ put_out: * directed to hwna. */ static int -netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) +netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_priv_d *npriv; struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; int error = 0; - if (attach) { + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)hdr->nr_body; + if (req->reg.nr_ringid != 0 || + (req->reg.nr_mode != NR_REG_ALL_NIC && + req->reg.nr_mode != NR_REG_NIC_SW)) { + /* We only support attaching all the NIC rings + * and/or the host stack. */ + return EINVAL; + } if (NETMAP_OWNED_BY_ANY(na)) { return EBUSY; } @@ -2764,7 +2988,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) if (npriv == NULL) return ENOMEM; npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ - error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); + error = netmap_do_regif(npriv, na, req->reg.nr_mode, + req->reg.nr_ringid, req->reg.nr_flags); if (error) { netmap_priv_delete(npriv); return error; @@ -2778,8 +3003,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) bna->na_kpriv = NULL; na->na_flags &= ~NAF_BUSY; } - return error; + return error; } /* attach a bridge wrapper to the 'real' device */ @@ -2837,7 +3062,9 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) bna->hwna = hwna; netmap_adapter_get(hwna); hwna->na_private = bna; /* weak reference */ + bna->saved_na_vp = hwna->na_vp; hwna->na_vp = &bna->up; + bna->up.up.na_vp = &(bna->up); if (hwna->na_flags & NAF_HOST_RINGS) { if (hwna->na_flags & NAF_SW_ONLY) |
