diff options
| author | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-04-09 09:24:26 +0000 |
|---|---|---|
| committer | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-04-09 09:24:26 +0000 |
| commit | 4f80b14ce2b17100b12dc3a346fb9e6e76764e11 (patch) | |
| tree | e7c1347079629914a4d8c369d8d70121ee53904f /sys/dev/netmap | |
| parent | df4531ffd910985c8ec5a288a69adff34ceb6c03 (diff) | |
Notes
Diffstat (limited to 'sys/dev/netmap')
| -rw-r--r-- | sys/dev/netmap/if_em_netmap.h | 4 | ||||
| -rw-r--r-- | sys/dev/netmap/if_igb_netmap.h | 4 | ||||
| -rw-r--r-- | sys/dev/netmap/if_ixl_netmap.h | 3 | ||||
| -rw-r--r-- | sys/dev/netmap/if_lem_netmap.h | 4 | ||||
| -rw-r--r-- | sys/dev/netmap/if_ptnet.c | 14 | ||||
| -rw-r--r-- | sys/dev/netmap/if_re_netmap.h | 3 | ||||
| -rw-r--r-- | sys/dev/netmap/if_vtnet_netmap.h | 38 | ||||
| -rw-r--r-- | sys/dev/netmap/ixgbe_netmap.h | 3 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap.c | 296 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_freebsd.c | 24 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_generic.c | 45 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_kern.h | 208 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_mem2.c | 758 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_mem2.h | 13 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_monitor.c | 8 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_offloadings.c | 24 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_pipe.c | 18 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_pt.c | 24 | ||||
| -rw-r--r-- | sys/dev/netmap/netmap_vale.c | 202 |
19 files changed, 1230 insertions, 463 deletions
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 26f85fdf321d5..299bc3837d5ea 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -235,8 +235,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = rxr->next_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -247,7 +245,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & E1000_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index cd80f2663bc70..df15ceee7d8cb 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -217,8 +217,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = rxr->next_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -229,7 +227,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & E1000_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h index ea0bf35dea67e..e9b036d34e874 100644 --- a/sys/dev/netmap/if_ixl_netmap.h +++ b/sys/dev/netmap/if_ixl_netmap.h @@ -331,7 +331,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) */ if (netmap_no_pendintr || force_update) { int crclen = ixl_crcstrip ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); @@ -346,7 +345,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) break; ring->slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index 56d65733a4f5f..f8ba2bb716c85 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -216,8 +216,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = adapter->next_rx_desc_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -234,7 +232,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) len = 0; } ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[nic_i].map, BUS_DMASYNC_POSTREAD); diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index becf7e4e8f04b..1805a7f31e48d 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -216,6 +216,7 @@ static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); static int ptnet_nm_register(struct netmap_adapter *na, int onoff); static int ptnet_nm_txsync(struct netmap_kring *kring, int flags); static int ptnet_nm_rxsync(struct netmap_kring *kring, int flags); +static void ptnet_nm_intr(struct netmap_adapter *na, int onoff); static void ptnet_tx_intr(void *opaque); static void ptnet_rx_intr(void *opaque); @@ -477,6 +478,7 @@ ptnet_attach(device_t dev) na_arg.nm_krings_create = ptnet_nm_krings_create; na_arg.nm_krings_delete = ptnet_nm_krings_delete; na_arg.nm_dtor = ptnet_nm_dtor; + na_arg.nm_intr = ptnet_nm_intr; na_arg.nm_register = ptnet_nm_register; na_arg.nm_txsync = ptnet_nm_txsync; na_arg.nm_rxsync = ptnet_nm_rxsync; @@ -1299,6 +1301,18 @@ ptnet_nm_rxsync(struct netmap_kring *kring, int flags) } static void +ptnet_nm_intr(struct netmap_adapter *na, int onoff) +{ + struct ptnet_softc *sc = if_getsoftc(na->ifp); + int i; + + for (i = 0; i < sc->num_rings; i++) { + struct ptnet_queue *pq = sc->queues + i; + pq->ptgh->guest_need_kick = onoff; + } +} + +static void ptnet_tx_intr(void *opaque) { struct ptnet_queue *pq = opaque; diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 2cb3454c5f312..e7dd087acc676 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -201,7 +201,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) * is to stop right before nm_hwcur. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; uint32_t stop_i = nm_prev(kring->nr_hwcur, lim); nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ @@ -218,7 +217,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) /* XXX subtract crc */ total_len = (total_len < 4) ? 0 : total_len - 4; ring->slot[nm_i].len = total_len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; /* sync was in re_newbuf() */ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD); diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h index 4bed0e718dd49..10789c53d1f06 100644 --- a/sys/dev/netmap/if_vtnet_netmap.h +++ b/sys/dev/netmap/if_vtnet_netmap.h @@ -122,6 +122,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) struct SOFTC_T *sc = ifp->if_softc; struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr]; struct virtqueue *vq = txq->vtntx_vq; + int interrupts = !(kring->nr_kflags & NKR_NOINTR); /* * First part: process new packets to send. @@ -179,7 +180,9 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) ring->head, ring->tail, virtqueue_nused(vq), (virtqueue_dump(vq), 1)); virtqueue_notify(vq); - virtqueue_enable_intr(vq); // like postpone with 0 + if (interrupts) { + virtqueue_enable_intr(vq); // like postpone with 0 + } } @@ -209,7 +212,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) { ND(3, "disable intr, hwcur %d", nm_i); virtqueue_disable_intr(vq); - } else { + } else if (interrupts) { ND(3, "enable intr, hwcur %d", nm_i); virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT); } @@ -277,6 +280,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; + int interrupts = !(kring->nr_kflags & NKR_NOINTR); /* device-specific */ struct SOFTC_T *sc = ifp->if_softc; @@ -297,7 +301,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) * and vtnet_netmap_init_buffers(). */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; struct netmap_adapter *token; nm_i = kring->nr_hwtail; @@ -309,7 +312,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) break; if (likely(token == (void *)rxq)) { ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; nm_i = nm_next(nm_i, lim); n++; } else { @@ -334,7 +337,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) kring->nr_hwcur = err; virtqueue_notify(vq); /* After draining the queue may need an intr from the hypervisor */ - vtnet_rxq_enable_intr(rxq); + if (interrupts) { + vtnet_rxq_enable_intr(rxq); + } } ND("[C] h %d c %d t %d hwcur %d hwtail %d", @@ -345,6 +350,28 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) } +/* Enable/disable interrupts on all virtqueues. */ +static void +vtnet_netmap_intr(struct netmap_adapter *na, int onoff) +{ + struct SOFTC_T *sc = na->ifp->if_softc; + int i; + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i]; + struct vtnet_txq *txq = &sc->vtnet_txqs[i]; + struct virtqueue *txvq = txq->vtntx_vq; + + if (onoff) { + vtnet_rxq_enable_intr(rxq); + virtqueue_enable_intr(txvq); + } else { + vtnet_rxq_disable_intr(rxq); + virtqueue_disable_intr(txvq); + } + } +} + /* Make RX virtqueues buffers pointing to netmap buffers. */ static int vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) @@ -417,6 +444,7 @@ vtnet_netmap_attach(struct SOFTC_T *sc) na.nm_txsync = vtnet_netmap_txsync; na.nm_rxsync = vtnet_netmap_rxsync; na.nm_config = vtnet_netmap_config; + na.nm_intr = vtnet_netmap_intr; na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; D("max rings %d", sc->vtnet_max_vq_pairs); netmap_attach(&na); diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index 419e08f0f6def..30da631917750 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -397,7 +397,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) */ if (netmap_no_pendintr || force_update) { int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); @@ -409,7 +408,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index edcc308e8d870..3c5551bad1569 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -482,10 +482,8 @@ ports attached to the switch) int netmap_verbose; static int netmap_no_timestamp; /* don't timestamp on rxsync */ -int netmap_mitigate = 1; int netmap_no_pendintr = 1; int netmap_txsync_retry = 2; -int netmap_flags = 0; /* debug flags */ static int netmap_fwd = 0; /* force transparent forwarding */ /* @@ -515,7 +513,9 @@ int netmap_generic_mit = 100*1000; * Anyway users looking for the best performance should * use native adapters. */ +#ifdef linux int netmap_generic_txqdisc = 1; +#endif /* Default number of slots and queues for generic adapters. */ int netmap_generic_ringsize = 1024; @@ -539,21 +539,32 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); -SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, - CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); +SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr, + 0, "Always look for new received packets."); SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, - &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); + &netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush."); -SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0, + "Force NR_FORWARD mode"); +SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0, + "Adapter mode. 0 selects the best option available," + "1 forces native adapter, 2 forces emulated adapter"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, + 0, "RX notification interval in nanoseconds"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, + &netmap_generic_ringsize, 0, + "Number of per-ring slots for emulated netmap mode"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, + &netmap_generic_rings, 0, + "Number of TX/RX queues for emulated netmap adapters"); +#ifdef linux +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, + &netmap_generic_txqdisc, 0, "Use qdisc for generic adapters"); +#endif +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, + 0, "Allow ptnet devices to use virtio-net headers"); +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, + &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing"); SYSEND; @@ -912,7 +923,19 @@ netmap_hw_krings_delete(struct netmap_adapter *na) netmap_krings_delete(na); } - +static void +netmap_mem_drop(struct netmap_adapter *na) +{ + int last = netmap_mem_deref(na->nm_mem, na); + /* if the native allocator had been overrided on regif, + * restore it now and drop the temporary one + */ + if (last && na->nm_mem_prev) { + netmap_mem_put(na->nm_mem); + na->nm_mem = na->nm_mem_prev; + na->nm_mem_prev = NULL; + } +} /* * Undo everything that was done in netmap_do_regif(). In particular, @@ -980,7 +1003,7 @@ netmap_do_unregif(struct netmap_priv_d *priv) /* delete the nifp */ netmap_mem_if_delete(na, priv->np_nifp); /* drop the allocator */ - netmap_mem_deref(na->nm_mem, na); + netmap_mem_drop(na); /* mark the priv as unregistered */ priv->np_na = NULL; priv->np_nifp = NULL; @@ -1289,7 +1312,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags) D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL)); slot->len = len; - slot->flags = kring->nkr_slot_flags; + slot->flags = 0; nm_i = nm_next(nm_i, lim); mbq_enqueue(&fq, m); } @@ -1409,7 +1432,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adap assign_mem: if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) && (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) { - netmap_mem_put((*na)->nm_mem); + (*na)->nm_mem_prev = (*na)->nm_mem; (*na)->nm_mem = netmap_mem_get(nmd); } @@ -1896,7 +1919,8 @@ netmap_krings_get(struct netmap_priv_d *priv) int excl = (priv->np_flags & NR_EXCLUSIVE); enum txrx t; - ND("%s: grabbing tx [%d, %d) rx [%d, %d)", + if (netmap_verbose) + D("%s: grabbing tx [%d, %d) rx [%d, %d)", na->name, priv->np_qfirst[NR_TX], priv->np_qlast[NR_TX], @@ -2059,9 +2083,57 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, if (na->active_fds == 0) { /* * If this is the first registration of the adapter, - * create the in-kernel view of the netmap rings, - * the netmap krings. + * perform sanity checks and create the in-kernel view + * of the netmap rings (the netmap krings). */ + if (na->ifp) { + /* This netmap adapter is attached to an ifnet. */ + unsigned nbs = netmap_mem_bufsize(na->nm_mem); + unsigned mtu = nm_os_ifnet_mtu(na->ifp); + /* The maximum amount of bytes that a single + * receive or transmit NIC descriptor can hold. */ + unsigned hw_max_slot_len = 4096; + + if (mtu <= hw_max_slot_len) { + /* The MTU fits a single NIC slot. We only + * Need to check that netmap buffers are + * large enough to hold an MTU. NS_MOREFRAG + * cannot be used in this case. */ + if (nbs < mtu) { + nm_prerr("error: netmap buf size (%u) " + "< device MTU (%u)", nbs, mtu); + error = EINVAL; + goto err_drop_mem; + } + } else { + /* More NIC slots may be needed to receive + * or transmit a single packet. Check that + * the adapter supports NS_MOREFRAG and that + * netmap buffers are large enough to hold + * the maximum per-slot size. */ + if (!(na->na_flags & NAF_MOREFRAG)) { + nm_prerr("error: large MTU (%d) needed " + "but %s does not support " + "NS_MOREFRAG", mtu, + na->ifp->if_xname); + error = EINVAL; + goto err_drop_mem; + } else if (nbs < hw_max_slot_len) { + nm_prerr("error: using NS_MOREFRAG on " + "%s requires netmap buf size " + ">= %u", na->ifp->if_xname, + hw_max_slot_len); + error = EINVAL; + goto err_drop_mem; + } else { + nm_prinf("info: netmap application on " + "%s needs to support " + "NS_MOREFRAG " + "(MTU=%u,netmap_buf_size=%u)", + na->ifp->if_xname, mtu, nbs); + } + } + } /* * Depending on the adapter, this may also create @@ -2128,15 +2200,15 @@ err_put_lut: memset(&na->na_lut, 0, sizeof(na->na_lut)); err_del_if: netmap_mem_if_delete(na, nifp); -err_rel_excl: - netmap_krings_put(priv); err_del_rings: netmap_mem_rings_delete(na); +err_rel_excl: + netmap_krings_put(priv); err_del_krings: if (na->active_fds == 0) na->nm_krings_delete(na); err_drop_mem: - netmap_mem_deref(na->nm_mem, na); + netmap_mem_drop(na); err: priv->np_na = NULL; return error; @@ -2224,6 +2296,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread do { /* memsize is always valid */ u_int memflags; + uint64_t memsize; if (nmr->nr_name[0] != '\0') { @@ -2243,10 +2316,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } } - error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags, + error = netmap_mem_get_info(nmd, &memsize, &memflags, &nmr->nr_arg2); if (error) break; + nmr->nr_memsize = (uint32_t)memsize; if (na == NULL) /* only memory info */ break; nmr->nr_offset = 0; @@ -2304,6 +2378,17 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } NMG_UNLOCK(); break; + } else if (i == NETMAP_POOLS_CREATE) { + nmd = netmap_mem_ext_create(nmr, &error); + if (nmd == NULL) + break; + /* reset the fields used by POOLS_CREATE to + * avoid confusing the rest of the code + */ + nmr->nr_cmd = 0; + nmr->nr_arg1 = 0; + nmr->nr_arg2 = 0; + nmr->nr_arg3 = 0; } else if (i != 0) { D("nr_cmd must be 0 not %d", i); error = EINVAL; @@ -2314,7 +2399,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread NMG_LOCK(); do { u_int memflags; - struct ifnet *ifp; + uint64_t memsize; if (priv->np_nifp != NULL) { /* thread already registered */ error = EBUSY; @@ -2356,12 +2441,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread nmr->nr_tx_rings = na->num_tx_rings; nmr->nr_rx_slots = na->num_rx_desc; nmr->nr_tx_slots = na->num_tx_desc; - error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags, + error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags, &nmr->nr_arg2); if (error) { netmap_do_unregif(priv); break; } + nmr->nr_memsize = (uint32_t)memsize; if (memflags & NETMAP_MEM_PRIVATE) { *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; } @@ -2533,7 +2619,6 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) #define want_tx want[NR_TX] #define want_rx want[NR_RX] struct mbq q; /* packets from RX hw queues to host stack */ - enum txrx t; /* * In order to avoid nested locks, we need to "double check" @@ -2585,14 +2670,15 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) check_all_tx = nm_si_user(priv, NR_TX); check_all_rx = nm_si_user(priv, NR_RX); +#ifdef __FreeBSD__ /* * We start with a lock free round which is cheap if we have * slots available. If this fails, then lock and call the sync - * routines. + * routines. We can't do this on Linux, as the contract says + * that we must call nm_os_selrecord() unconditionally. */ -#if 1 /* new code- call rx if any of the ring needs to release or read buffers */ if (want_tx) { - t = NR_TX; + enum txrx t = NR_TX; for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { kring = &NMR(na, t)[i]; /* XXX compare ring->cur and kring->tail */ @@ -2603,8 +2689,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) } } if (want_rx) { + enum txrx t = NR_RX; want_rx = 0; /* look for a reason to run the handlers */ - t = NR_RX; for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { kring = &NMR(na, t)[i]; if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */ @@ -2615,24 +2701,20 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) if (!want_rx) revents |= events & (POLLIN | POLLRDNORM); /* we have data */ } -#else /* old code */ - for_rx_tx(t) { - for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want[t]; - want[t] = 0; /* also breaks the loop */ - } - } - } -#endif /* old code */ +#endif + +#ifdef linux + /* The selrecord must be unconditional on linux. */ + nm_os_selrecord(sr, check_all_tx ? + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + nm_os_selrecord(sr, check_all_rx ? + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); +#endif /* linux */ /* * If we want to push packets out (priv->np_txpoll) or * want_tx is still set, we must issue txsync calls * (on all rings, to avoid that the tx rings stall). - * XXX should also check cur != hwcur on the tx rings. * Fortunately, normal tx mode has np_txpoll set. */ if (priv->np_txpoll || want_tx) { @@ -2649,6 +2731,12 @@ flush_tx: kring = &na->tx_rings[i]; ring = kring->ring; + /* + * Don't try to txsync this TX ring if we already found some + * space in some of the TX rings (want_tx == 0) and there are no + * TX slots in this ring that need to be flushed to the NIC + * (cur == hwcur). + */ if (!send_down && !want_tx && ring->cur == kring->nr_hwcur) continue; @@ -2676,14 +2764,18 @@ flush_tx: if (found) { /* notify other listeners */ revents |= want_tx; want_tx = 0; +#ifndef linux kring->nm_notify(kring, 0); +#endif /* linux */ } } /* if there were any packet to forward we must have handled them by now */ send_down = 0; if (want_tx && retry_tx && sr) { +#ifndef linux nm_os_selrecord(sr, check_all_tx ? &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); +#endif /* !linux */ retry_tx = 0; goto flush_tx; } @@ -2734,14 +2826,18 @@ do_retry_rx: if (found) { revents |= want_rx; retry_rx = 0; +#ifndef linux kring->nm_notify(kring, 0); +#endif /* linux */ } } +#ifndef linux if (retry_rx && sr) { nm_os_selrecord(sr, check_all_rx ? &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); } +#endif /* !linux */ if (send_down || retry_rx) { retry_rx = 0; if (send_down) @@ -2766,6 +2862,44 @@ do_retry_rx: #undef want_rx } +int +nma_intr_enable(struct netmap_adapter *na, int onoff) +{ + bool changed = false; + enum txrx t; + int i; + + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t); i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + int on = !(kring->nr_kflags & NKR_NOINTR); + + if (!!onoff != !!on) { + changed = true; + } + if (onoff) { + kring->nr_kflags &= ~NKR_NOINTR; + } else { + kring->nr_kflags |= NKR_NOINTR; + } + } + } + + if (!changed) { + return 0; /* nothing to do */ + } + + if (!na->nm_intr) { + D("Cannot %s interrupts for %s", onoff ? "enable" : "disable", + na->name); + return -1; + } + + na->nm_intr(na, onoff); + + return 0; +} + /*-------------------- driver support routines -------------------*/ @@ -2804,6 +2938,7 @@ netmap_attach_common(struct netmap_adapter *na) if (na->na_flags & NAF_HOST_RINGS && na->ifp) { na->if_input = na->ifp->if_input; /* for netmap_send_up */ } + na->pdev = na; /* make sure netmap_mem_map() is called */ #endif /* __FreeBSD__ */ if (na->nm_krings_create == NULL) { /* we assume that we have been called by a driver, @@ -2832,22 +2967,6 @@ netmap_attach_common(struct netmap_adapter *na) return 0; } - -/* standard cleanup, called by all destructors */ -void -netmap_detach_common(struct netmap_adapter *na) -{ - if (na->tx_rings) { /* XXX should not happen */ - D("freeing leftover tx_rings"); - na->nm_krings_delete(na); - } - netmap_pipe_dealloc(na); - if (na->nm_mem) - netmap_mem_put(na->nm_mem); - bzero(na, sizeof(*na)); - nm_os_free(na); -} - /* Wrapper for the register callback provided netmap-enabled * hardware drivers. * nm_iszombie(na) means that the driver module has been @@ -2900,7 +3019,7 @@ netmap_hw_dtor(struct netmap_adapter *na) * Return 0 on success, ENOMEM otherwise. */ int -netmap_attach_ext(struct netmap_adapter *arg, size_t size) +netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg) { struct netmap_hw_adapter *hwna = NULL; struct ifnet *ifp = NULL; @@ -2912,15 +3031,27 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size) if (arg == NULL || arg->ifp == NULL) goto fail; + ifp = arg->ifp; + if (NA(ifp) && !NM_NA_VALID(ifp)) { + /* If NA(ifp) is not null but there is no valid netmap + * adapter it means that someone else is using the same + * pointer (e.g. ax25_ptr on linux). This happens for + * instance when also PF_RING is in use. */ + D("Error: netmap adapter hook is busy"); + return EBUSY; + } + hwna = nm_os_malloc(size); if (hwna == NULL) goto fail; hwna->up = *arg; hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE; strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); - hwna->nm_hw_register = hwna->up.nm_register; - hwna->up.nm_register = netmap_hw_reg; + if (override_reg) { + hwna->nm_hw_register = hwna->up.nm_register; + hwna->up.nm_register = netmap_hw_reg; + } if (netmap_attach_common(&hwna->up)) { nm_os_free(hwna); goto fail; @@ -2939,6 +3070,7 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size) #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */ } hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; + hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu; if (ifp->ethtool_ops) { hwna->nm_eto = *ifp->ethtool_ops; } @@ -2968,7 +3100,8 @@ fail: int netmap_attach(struct netmap_adapter *arg) { - return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter)); + return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter), + 1 /* override nm_reg */); } @@ -2996,7 +3129,15 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na) if (na->nm_dtor) na->nm_dtor(na); - netmap_detach_common(na); + if (na->tx_rings) { /* XXX should not happen */ + D("freeing leftover tx_rings"); + na->nm_krings_delete(na); + } + netmap_pipe_dealloc(na); + if (na->nm_mem) + netmap_mem_put(na->nm_mem); + bzero(na, sizeof(*na)); + nm_os_free(na); return 1; } @@ -3029,15 +3170,14 @@ netmap_detach(struct ifnet *ifp) NMG_LOCK(); netmap_set_all_rings(na, NM_KR_LOCKED); - na->na_flags |= NAF_ZOMBIE; /* * if the netmap adapter is not native, somebody * changed it, so we can not release it here. * The NAF_ZOMBIE flag will notify the new owner that * the driver is gone. */ - if (na->na_flags & NAF_NATIVE) { - netmap_adapter_put(na); + if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) { + na->na_flags |= NAF_ZOMBIE; } /* give active users a chance to notice that NAF_ZOMBIE has been * turned on, so that they can stop and return an error to userspace. @@ -3116,9 +3256,9 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) */ mbq_lock(q); - busy = kring->nr_hwtail - kring->nr_hwcur; - if (busy < 0) - busy += kring->nkr_num_slots; + busy = kring->nr_hwtail - kring->nr_hwcur; + if (busy < 0) + busy += kring->nkr_num_slots; if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) { RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q)); @@ -3216,16 +3356,6 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, kring->nr_hwtail -= lim + 1; } -#if 0 // def linux - /* XXX check that the mappings are correct */ - /* need ring_nr, adapter->pdev, direction */ - buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { - D("error mapping rx netmap buffer %d", i); - // XXX fix error handling - } - -#endif /* linux */ /* * Wakeup on the individual and global selwait * We do the wakeup here, but the ring is not yet reconfigured. diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index b811a017822b8..c122dc64bed26 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -173,6 +173,16 @@ nm_os_ifnet_fini(void) nm_ifnet_dh_tag); } +unsigned +nm_os_ifnet_mtu(struct ifnet *ifp) +{ +#if __FreeBSD_version < 1100030 + return ifp->if_data.ifi_mtu; +#else /* __FreeBSD_version >= 1100030 */ + return ifp->if_mtu; +#endif +} + rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) { @@ -294,24 +304,30 @@ nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = na->ifp; + int ret = 0; + nm_os_ifnet_lock(); if (intercept) { if (gna->save_if_input) { D("cannot intercept again"); - return EINVAL; /* already set */ + ret = EINVAL; /* already set */ + goto out; } gna->save_if_input = ifp->if_input; ifp->if_input = freebsd_generic_rx_handler; } else { if (!gna->save_if_input){ D("cannot restore"); - return EINVAL; /* not saved */ + ret = EINVAL; /* not saved */ + goto out; } ifp->if_input = gna->save_if_input; gna->save_if_input = NULL; } +out: + nm_os_ifnet_unlock(); - return 0; + return ret; } @@ -327,12 +343,14 @@ nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept) struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = netmap_generic_getifp(gna); + nm_os_ifnet_lock(); if (intercept) { na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_transmit; } else { ifp->if_transmit = na->if_transmit; } + nm_os_ifnet_unlock(); return 0; } diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 1276a3a0c46cb..2ed251a557756 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -86,8 +86,6 @@ __FBSDID("$FreeBSD$"); #include <dev/netmap/netmap_kern.h> #include <dev/netmap/netmap_mem2.h> -#define rtnl_lock() ND("rtnl_lock called") -#define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() @@ -168,7 +166,13 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) * has a KASSERT(), checking that the mbuf dtor function is not NULL. */ +#if __FreeBSD_version <= 1200050 +static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { } +#else /* __FreeBSD_version >= 1200051 */ +/* The arg1 and arg2 pointers argument were removed by r324446, which + * in included since version 1200051. */ static void void_mbuf_dtor(struct mbuf *m) { } +#endif /* __FreeBSD_version >= 1200051 */ #define SET_MBUF_DESTRUCTOR(m, fn) do { \ (m)->m_ext.ext_free = (fn != NULL) ? \ @@ -200,8 +204,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #include "win_glue.h" -#define rtnl_lock() ND("rtnl_lock called") -#define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_TXQ(m) 0//((m)->m_pkthdr.flowid) #define MBUF_RXQ(m) 0//((m)->m_pkthdr.flowid) #define smp_mb() //XXX: to be correctly defined @@ -210,7 +212,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #include "bsd_glue.h" -#include <linux/rtnetlink.h> /* rtnl_[un]lock() */ #include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ #include <linux/hrtimer.h> @@ -339,17 +340,13 @@ generic_netmap_unregister(struct netmap_adapter *na) int i, r; if (na->active_fds == 0) { - rtnl_lock(); - na->na_flags &= ~NAF_NETMAP_ON; - /* Release packet steering control. */ - nm_os_catch_tx(gna, 0); - /* Stop intercepting packets on the RX path. */ nm_os_catch_rx(gna, 0); - rtnl_unlock(); + /* Release packet steering control. */ + nm_os_catch_tx(gna, 0); } for_each_rx_kring_h(r, kring, na) { @@ -510,24 +507,20 @@ generic_netmap_register(struct netmap_adapter *na, int enable) } if (na->active_fds == 0) { - rtnl_lock(); - /* Prepare to intercept incoming traffic. */ error = nm_os_catch_rx(gna, 1); if (error) { D("nm_os_catch_rx(1) failed (%d)", error); - goto register_handler; + goto free_tx_pools; } - /* Make netmap control the packet steering. */ + /* Let netmap control the packet steering. */ error = nm_os_catch_tx(gna, 1); if (error) { D("nm_os_catch_tx(1) failed (%d)", error); goto catch_rx; } - rtnl_unlock(); - na->na_flags |= NAF_NETMAP_ON; #ifdef RATE_GENERIC @@ -548,8 +541,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable) /* Here (na->active_fds == 0) holds. */ catch_rx: nm_os_catch_rx(gna, 0); -register_handler: - rtnl_unlock(); free_tx_pools: for_each_tx_kring(r, kring, na) { mtx_destroy(&kring->tx_event_lock); @@ -626,7 +617,11 @@ generic_mbuf_destructor(struct mbuf *m) * txsync. */ netmap_generic_irq(na, r, NULL); #ifdef __FreeBSD__ +#if __FreeBSD_version <= 1200050 + void_mbuf_dtor(m, NULL, NULL); +#else /* __FreeBSD_version >= 1200051 */ void_mbuf_dtor(m); +#endif /* __FreeBSD_version >= 1200051 */ #endif } @@ -1017,7 +1012,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* Adapter-specific variables. */ - uint16_t slot_flags = kring->nkr_slot_flags; u_int nm_buf_len = NETMAP_BUF_SIZE(na); struct mbq tmpq; struct mbuf *m; @@ -1096,7 +1090,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) avail -= nm_buf_len; ring->slot[nm_i].len = copy; - ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0); + ring->slot[nm_i].flags = (mlen ? NS_MOREFRAG : 0); nm_i = nm_next(nm_i, lim); } @@ -1208,6 +1202,15 @@ generic_netmap_attach(struct ifnet *ifp) } #endif + if (NA(ifp) && !NM_NA_VALID(ifp)) { + /* If NA(ifp) is not null but there is no valid netmap + * adapter it means that someone else is using the same + * pointer (e.g. ax25_ptr on linux). This happens for + * instance when also PF_RING is in use. */ + D("Error: netmap adapter hook is busy"); + return EBUSY; + } + num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 268c980ff1746..3e64510913242 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -39,6 +39,9 @@ #if defined(linux) +#if defined(CONFIG_NETMAP_EXTMEM) +#define WITH_EXTMEM +#endif #if defined(CONFIG_NETMAP_VALE) #define WITH_VALE #endif @@ -90,6 +93,7 @@ #define NM_MTX_INIT(m) sx_init(&(m), #m) #define NM_MTX_DESTROY(m) sx_destroy(&(m)) #define NM_MTX_LOCK(m) sx_xlock(&(m)) +#define NM_MTX_SPINLOCK(m) while (!sx_try_xlock(&(m))) ; #define NM_MTX_UNLOCK(m) sx_xunlock(&(m)) #define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED) @@ -100,7 +104,7 @@ #define MBUF_TRANSMIT(na, ifp, m) ((na)->if_transmit(ifp, m)) #define GEN_TX_MBUF_IFP(m) ((m)->m_pkthdr.rcvif) -#define NM_ATOMIC_T volatile int // XXX ? +#define NM_ATOMIC_T volatile int /* required by atomic/bitops.h */ /* atomic operations */ #include <machine/atomic.h> #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) @@ -132,13 +136,10 @@ struct nm_selinfo { }; -// XXX linux struct, not used in FreeBSD -struct net_device_ops { -}; -struct ethtool_ops { -}; struct hrtimer { + /* Not used in FreeBSD. */ }; + #define NM_BNS_GET(b) #define NM_BNS_PUT(b) @@ -202,14 +203,6 @@ struct hrtimer { #define NETMAP_KERNEL_XCHANGE_POINTERS _IO('i', 180) #define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL _IO_direct('i', 195) -//Empty data structures are not permitted by MSVC compiler -//XXX_ale, try to solve this problem -struct net_device_ops{ - char data[1]; -}; -typedef struct ethtool_ops{ - char data[1]; -}; typedef struct hrtimer{ KTIMER timer; BOOLEAN active; @@ -297,6 +290,8 @@ void nm_os_ifnet_fini(void); void nm_os_ifnet_lock(void); void nm_os_ifnet_unlock(void); +unsigned nm_os_ifnet_mtu(struct ifnet *ifp); + void nm_os_get_module(void); void nm_os_put_module(void); @@ -305,8 +300,10 @@ void netmap_undo_zombie(struct ifnet *); /* os independent alloc/realloc/free */ void *nm_os_malloc(size_t); +void *nm_os_vmalloc(size_t); void *nm_os_realloc(void *, size_t new_size, size_t old_size); void nm_os_free(void *); +void nm_os_vfree(void *); /* passes a packet up to the host stack. * If the packet is sent (or dropped) immediately it returns NULL, @@ -371,8 +368,7 @@ struct netmap_zmon_list { * TX rings: hwcur + hwofs coincides with next_to_send * * For received packets, slot->flags is set to nkr_slot_flags - * so we can provide a proper initial value (e.g. set NS_FORWARD - * when operating in 'transparent' mode). + * so we can provide a proper initial value. * * The following fields are used to implement lock-free copy of packets * from input to output ports in VALE switch: @@ -427,6 +423,7 @@ struct netmap_kring { * (used internally by pipes and * by ptnetmap host ports) */ +#define NKR_NOINTR 0x10 /* don't use interrupts on this ring */ uint32_t nr_mode; uint32_t nr_pending_mode; @@ -442,8 +439,6 @@ struct netmap_kring { */ int32_t nkr_hwofs; - uint16_t nkr_slot_flags; /* initial value for flags */ - /* last_reclaim is opaque marker to help reduce the frequency * of operations such as reclaiming tx buffers. A possible use * is set it to ticks and do the reclaim only once per tick. @@ -580,7 +575,7 @@ nm_prev(uint32_t i, uint32_t lim) +-----------------+ +-----------------+ | | | | - |XXX free slot XXX| |XXX free slot XXX| + | free | | free | +-----------------+ +-----------------+ head->| owned by user |<-hwcur | not sent to nic |<-hwcur | | | yet | @@ -621,9 +616,14 @@ tail->| |<-hwtail | |<-hwlease * a circular array where completions should be reported. */ +struct lut_entry; +#ifdef __FreeBSD__ +#define plut_entry lut_entry +#endif struct netmap_lut { struct lut_entry *lut; + struct plut_entry *plut; uint32_t objtotal; /* max buffer index */ uint32_t objsize; /* buffer size */ }; @@ -671,6 +671,7 @@ struct netmap_adapter { #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ #define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */ +#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */ #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */ #define NAF_BUSY (1U<<31) /* the adapter is used internally and * cannot be registered from userspace @@ -711,9 +712,8 @@ struct netmap_adapter { /* copy of if_input for netmap_send_up() */ void (*if_input)(struct ifnet *, struct mbuf *); - /* references to the ifnet and device routines, used by - * the generic netmap functions. - */ + /* Back reference to the parent ifnet struct. Used for + * hardware ports (emulated netmap included). */ struct ifnet *ifp; /* adapter is ifp->if_softc */ /*---- callbacks for this netmap adapter -----*/ @@ -806,6 +806,7 @@ struct netmap_adapter { * buffer addresses, the total number of buffers and the buffer size. */ struct netmap_mem_d *nm_mem; + struct netmap_mem_d *nm_mem_prev; struct netmap_lut na_lut; /* additional information attached to this adapter @@ -861,6 +862,8 @@ NMR(struct netmap_adapter *na, enum txrx t) return (t == NR_TX ? na->tx_rings : na->rx_rings); } +int nma_intr_enable(struct netmap_adapter *na, int onoff); + /* * If the NIC is owned by the kernel * (i.e., bridge), neither another bridge nor user can use it; @@ -898,8 +901,10 @@ struct netmap_vp_adapter { /* VALE software port */ struct netmap_hw_adapter { /* physical device */ struct netmap_adapter up; - struct net_device_ops nm_ndo; // XXX linux only - struct ethtool_ops nm_eto; // XXX linux only +#ifdef linux + struct net_device_ops nm_ndo; + struct ethtool_ops nm_eto; +#endif const struct ethtool_ops* save_ethtool; int (*nm_hw_register)(struct netmap_adapter *, int onoff); @@ -920,12 +925,10 @@ struct netmap_generic_adapter { /* emulated device */ /* Pointer to a previously used netmap adapter. */ struct netmap_adapter *prev; - /* generic netmap adapters support: - * a net_device_ops struct overrides ndo_select_queue(), - * save_if_input saves the if_input hook (FreeBSD), - * mit implements rx interrupt mitigation, + /* Emulated netmap adapters support: + * - save_if_input saves the if_input hook (FreeBSD); + * - mit implements rx interrupt mitigation; */ - struct net_device_ops generic_ndo; void (*save_if_input)(struct ifnet *, struct mbuf *); struct nm_generic_mit *mit; @@ -1186,7 +1189,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr) * virtual ports (vale, pipes, monitor) */ int netmap_attach(struct netmap_adapter *); -int netmap_attach_ext(struct netmap_adapter *, size_t size); +int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg); void netmap_detach(struct ifnet *); int netmap_transmit(struct ifnet *, struct mbuf *); struct netmap_slot *netmap_reset(struct netmap_adapter *na, @@ -1279,15 +1282,12 @@ nm_set_native_flags(struct netmap_adapter *na) ifp->if_transmit = netmap_transmit; #elif defined (_WIN32) (void)ifp; /* prevent a warning */ - //XXX_ale can we just comment those? - //na->if_transmit = ifp->if_transmit; - //ifp->if_transmit = netmap_transmit; -#else +#elif defined (linux) na->if_transmit = (void *)ifp->netdev_ops; ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto; -#endif +#endif /* linux */ nm_update_hostrings_mode(na); } @@ -1308,8 +1308,6 @@ nm_clear_native_flags(struct netmap_adapter *na) ifp->if_transmit = na->if_transmit; #elif defined(_WIN32) (void)ifp; /* prevent a warning */ - //XXX_ale can we just comment those? - //ifp->if_transmit = na->if_transmit; #else ifp->netdev_ops = (void *)na->if_transmit; ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; @@ -1374,8 +1372,6 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *); * - provide defaults for the setup callbacks and the memory allocator */ int netmap_attach_common(struct netmap_adapter *); -/* common actions to be performed on netmap adapter destruction */ -void netmap_detach_common(struct netmap_adapter *); /* fill priv->np_[tr]xq{first,last} using the ringid and flags information * coming from a struct nmreq */ @@ -1431,8 +1427,8 @@ int netmap_get_hw_na(struct ifnet *ifp, * * VALE only supports unicast or broadcast. The lookup * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, - * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. - * XXX in practice "unknown" might be handled same as broadcast. + * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate + * drop. */ typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, struct netmap_vp_adapter *); @@ -1471,7 +1467,7 @@ int netmap_bdg_config(struct nmreq *nmr); #ifdef WITH_PIPES /* max number of pipes per device */ -#define NM_MAXPIPES 64 /* XXX how many? */ +#define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */ void netmap_pipe_dealloc(struct netmap_adapter *); int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); @@ -1573,7 +1569,9 @@ extern int netmap_flags; extern int netmap_generic_mit; extern int netmap_generic_ringsize; extern int netmap_generic_rings; +#ifdef linux extern int netmap_generic_txqdisc; +#endif extern int ptnetmap_tx_workers; /* @@ -1618,13 +1616,14 @@ static void netmap_dmamap_cb(__unused void *arg, /* bus_dmamap_load wrapper: call aforementioned function if map != NULL. * XXX can we do it without a callback ? */ -static inline void +static inline int netmap_load_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { if (map) bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); + return 0; } static inline void @@ -1635,6 +1634,8 @@ netmap_unload_map(struct netmap_adapter *na, bus_dmamap_unload(tag, map); } +#define netmap_sync_map(na, tag, map, sz, t) + /* update the map when a buffer changes. */ static inline void netmap_reload_map(struct netmap_adapter *na, @@ -1654,22 +1655,52 @@ netmap_reload_map(struct netmap_adapter *na, int nm_iommu_group_id(bus_dma_tag_t dev); #include <linux/dma-mapping.h> -static inline void +/* + * on linux we need + * dma_map_single(&pdev->dev, virt_addr, len, direction) + * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction) + */ +#if 0 + struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; + /* set time_stamp *before* dma to help avoid a possible race */ + buffer_info->time_stamp = jiffies; + buffer_info->mapped_as_page = false; + buffer_info->length = len; + //buffer_info->next_to_watch = l; + /* reload dma map */ + dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, + NETMAP_BUF_SIZE, DMA_TO_DEVICE); + buffer_info->dma = dma_map_single(&adapter->pdev->dev, + addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE); + + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { + D("dma mapping error"); + /* goto dma_error; See e1000_put_txbuf() */ + /* XXX reset */ + } + tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX + +#endif + +static inline int netmap_load_map(struct netmap_adapter *na, - bus_dma_tag_t tag, bus_dmamap_t map, void *buf) + bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size) { - if (0 && map) { - *map = dma_map_single(na->pdev, buf, NETMAP_BUF_SIZE(na), + if (map) { + *map = dma_map_single(na->pdev, buf, size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(na->pdev, *map)) { + *map = 0; + return ENOMEM; + } } + return 0; } static inline void netmap_unload_map(struct netmap_adapter *na, - bus_dma_tag_t tag, bus_dmamap_t map) + bus_dma_tag_t tag, bus_dmamap_t map, u_int sz) { - u_int sz = NETMAP_BUF_SIZE(na); - if (*map) { dma_unmap_single(na->pdev, *map, sz, DMA_BIDIRECTIONAL); @@ -1677,6 +1708,20 @@ netmap_unload_map(struct netmap_adapter *na, } static inline void +netmap_sync_map(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t) +{ + if (*map) { + if (t == NR_RX) + dma_sync_single_for_cpu(na->pdev, *map, sz, + DMA_FROM_DEVICE); + else + dma_sync_single_for_device(na->pdev, *map, sz, + DMA_TO_DEVICE); + } +} + +static inline void netmap_reload_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { @@ -1691,44 +1736,6 @@ netmap_reload_map(struct netmap_adapter *na, DMA_BIDIRECTIONAL); } -/* - * XXX How do we redefine these functions: - * - * on linux we need - * dma_map_single(&pdev->dev, virt_addr, len, direction) - * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction - * The len can be implicit (on netmap it is NETMAP_BUF_SIZE) - * unfortunately the direction is not, so we need to change - * something to have a cross API - */ - -#if 0 - struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; - /* set time_stamp *before* dma to help avoid a possible race */ - buffer_info->time_stamp = jiffies; - buffer_info->mapped_as_page = false; - buffer_info->length = len; - //buffer_info->next_to_watch = l; - /* reload dma map */ - dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, - NETMAP_BUF_SIZE, DMA_TO_DEVICE); - buffer_info->dma = dma_map_single(&adapter->pdev->dev, - addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE); - - if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { - D("dma mapping error"); - /* goto dma_error; See e1000_put_txbuf() */ - /* XXX reset */ - } - tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX - -#endif - -/* - * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction. - */ -#define bus_dmamap_sync(_a, _b, _c) - #endif /* linux */ @@ -1764,10 +1771,26 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx) /* Entries of the look-up table. */ +#ifdef __FreeBSD__ +struct lut_entry { + void *vaddr; /* virtual address. */ + vm_paddr_t paddr; /* physical address. */ +}; +#else /* linux & _WIN32 */ +/* dma-mapping in linux can assign a buffer a different address + * depending on the device, so we need to have a separate + * physical-address look-up table for each na. + * We can still share the vaddrs, though, therefore we split + * the lut_entry structure. + */ struct lut_entry { void *vaddr; /* virtual address. */ +}; + +struct plut_entry { vm_paddr_t paddr; /* physical address. */ }; +#endif /* linux & _WIN32 */ struct netmap_obj_pool; @@ -1789,12 +1812,13 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) { uint32_t i = slot->buf_idx; struct lut_entry *lut = na->na_lut.lut; + struct plut_entry *plut = na->na_lut.plut; void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr; -#ifndef _WIN32 - *pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr; +#ifdef _WIN32 + *pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart; #else - *pp = (i >= na->na_lut.objtotal) ? (uint64_t)lut[0].paddr.QuadPart : (uint64_t)lut[i].paddr.QuadPart; + *pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr; #endif return ret; } @@ -1823,7 +1847,7 @@ struct netmap_priv_d { uint32_t np_flags; /* from the ioctl */ u_int np_qfirst[NR_TXRX], np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */ - uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ + uint16_t np_txpoll; int np_sync_flags; /* to be passed to nm_sync */ int np_refs; /* use with NMG_LOCK held */ diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index a39aa1b3f042d..1f206a1b02927 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -108,6 +108,7 @@ struct netmap_obj_pool { struct lut_entry *lut; /* virt,phys addresses, objtotal entries */ uint32_t *bitmap; /* one bit per buffer, 1 means free */ + uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */ uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ /* ---------------------------------------------------*/ @@ -134,7 +135,7 @@ struct netmap_obj_pool { struct netmap_mem_ops { int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); - int (*nmd_get_info)(struct netmap_mem_d *, u_int *size, + int (*nmd_get_info)(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t); @@ -217,7 +218,7 @@ netmap_mem_##name(struct netmap_adapter *na, t1 a1) \ } NMD_DEFCB1(int, get_lut, struct netmap_lut *); -NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *); +NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *); NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t); static int netmap_mem_config(struct netmap_mem_d *); NMD_DEFCB(int, config); @@ -243,6 +244,7 @@ netmap_mem_get_id(struct netmap_mem_d *nmd) #define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) #define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) #define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) +#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) #define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) #ifdef NM_DEBUG_MEM_PUTGET @@ -291,68 +293,115 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) NMA_UNLOCK(nmd); } - if (!nmd->lasterr && na->pdev) - netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); + if (!nmd->lasterr && na->pdev) { + nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); + if (nmd->lasterr) { + netmap_mem_deref(nmd, na); + } + } return nmd->lasterr; } -void +static int +nm_isset(uint32_t *bitmap, u_int i) +{ + return bitmap[ (i>>5) ] & ( 1U << (i & 31U) ); +} + + +static int +netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p) +{ + u_int n, j; + + if (p->bitmap == NULL) { + /* Allocate the bitmap */ + n = (p->objtotal + 31) / 32; + p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); + if (p->bitmap == NULL) { + D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, + p->name); + return ENOMEM; + } + p->bitmap_slots = n; + } else { + memset(p->bitmap, 0, p->bitmap_slots); + } + + p->objfree = 0; + /* + * Set all the bits in the bitmap that have + * corresponding buffers to 1 to indicate they are + * free. + */ + for (j = 0; j < p->objtotal; j++) { + if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) { + D("skipping %s %d", p->name, j); + continue; + } + p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) ); + p->objfree++; + } + + ND("%s free %u", p->name, p->objfree); + if (p->objfree == 0) + return ENOMEM; + + return 0; +} + +static int +netmap_mem_init_bitmaps(struct netmap_mem_d *nmd) +{ + int i, error = 0; + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &nmd->pools[i]; + + error = netmap_init_obj_allocator_bitmap(p); + if (error) + return error; + } + + /* + * buffers 0 and 1 are reserved + */ + if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) { + return ENOMEM; + } + + nmd->pools[NETMAP_BUF_POOL].objfree -= 2; + if (nmd->pools[NETMAP_BUF_POOL].bitmap) { + /* XXX This check is a workaround that prevents a + * NULL pointer crash which currently happens only + * with ptnetmap guests. + * Removed shared-info --> is the bug still there? */ + nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3U; + } + return 0; +} + +int netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { + int last_user = 0; NMA_LOCK(nmd); - netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); + if (na->active_fds <= 0) + netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); if (nmd->active == 1) { - u_int i; - + last_user = 1; /* * Reset the allocator when it falls out of use so that any * pool resources leaked by unclean application exits are * reclaimed. */ - for (i = 0; i < NETMAP_POOLS_NR; i++) { - struct netmap_obj_pool *p; - u_int j; - - p = &nmd->pools[i]; - p->objfree = p->objtotal; - /* - * Reproduce the net effect of the M_ZERO malloc() - * and marking of free entries in the bitmap that - * occur in finalize_obj_allocator() - */ - memset(p->bitmap, - '\0', - sizeof(uint32_t) * ((p->objtotal + 31) / 32)); - - /* - * Set all the bits in the bitmap that have - * corresponding buffers to 1 to indicate they are - * free. - */ - for (j = 0; j < p->objtotal; j++) { - if (p->lut[j].vaddr != NULL) { - p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) ); - } - } - } - - /* - * Per netmap_mem_finalize_all(), - * buffers 0 and 1 are reserved - */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - if (nmd->pools[NETMAP_BUF_POOL].bitmap) { - /* XXX This check is a workaround that prevents a - * NULL pointer crash which currently happens only - * with ptnetmap guests. - * Removed shared-info --> is the bug still there? */ - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; - } + netmap_mem_init_bitmaps(nmd); } nmd->ops->nmd_deref(nmd); NMA_UNLOCK(nmd); + return last_user; } @@ -361,6 +410,9 @@ static int netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { lut->lut = nmd->pools[NETMAP_BUF_POOL].lut; +#ifdef __FreeBSD__ + lut->plut = lut->lut; +#endif lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; @@ -442,7 +494,6 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ /* blueprint for the private memory allocators */ -extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */ /* XXX clang is not happy about using name as a print format */ static const struct netmap_mem_d nm_blueprint = { .pools = { @@ -601,6 +652,48 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev) return err; } +static struct lut_entry * +nm_alloc_lut(u_int nobj) +{ + size_t n = sizeof(struct lut_entry) * nobj; + struct lut_entry *lut; +#ifdef linux + lut = vmalloc(n); +#else + lut = nm_os_malloc(n); +#endif + return lut; +} + +static void +nm_free_lut(struct lut_entry *lut, u_int objtotal) +{ + bzero(lut, sizeof(struct lut_entry) * objtotal); +#ifdef linux + vfree(lut); +#else + nm_os_free(lut); +#endif +} + +#if defined(linux) || defined(_WIN32) +static struct plut_entry * +nm_alloc_plut(u_int nobj) +{ + size_t n = sizeof(struct plut_entry) * nobj; + struct plut_entry *lut; + lut = vmalloc(n); + return lut; +} + +static void +nm_free_plut(struct plut_entry * lut) +{ + vfree(lut); +} +#endif /* linux or _WIN32 */ + + /* * First, find the allocator that contains the requested offset, * then locate the cluster through a lookup table. @@ -613,7 +706,14 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) vm_paddr_t pa; struct netmap_obj_pool *p; +#if defined(__FreeBSD__) + /* This function is called by netmap_dev_pager_fault(), which holds a + * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we + * spin on the trylock. */ + NMA_SPINLOCK(nmd); +#else NMA_LOCK(nmd); +#endif p = nmd->pools; for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { @@ -640,7 +740,7 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) + p[NETMAP_BUF_POOL].memtotal); NMA_UNLOCK(nmd); #ifndef _WIN32 - return 0; // XXX bad address + return 0; /* bad address */ #else vm_paddr_t res; res.QuadPart = 0; @@ -676,7 +776,8 @@ PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd) { int i, j; - u_int memsize, memflags, ofs = 0; + size_t memsize; + u_int memflags, ofs = 0; PMDL mainMdl, tempMdl; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { @@ -746,7 +847,7 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize } static int -netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags, +netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, nm_memid_t *id) { int error = 0; @@ -835,7 +936,6 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_ if (len > p->_objsize) { D("%s request size %d too large", p->name, len); - // XXX cannot reduce the size return NULL; } @@ -911,7 +1011,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; /* Given address, is out of the scope of the current cluster.*/ - if (vaddr < base || relofs >= p->_clustsize) + if (base == NULL || vaddr < base || relofs >= p->_clustsize) continue; j = j + relofs / p->_objsize; @@ -923,8 +1023,11 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) vaddr, p->name); } -#define netmap_mem_bufsize(n) \ - ((n)->pools[NETMAP_BUF_POOL]._objsize) +unsigned +netmap_mem_bufsize(struct netmap_mem_d *nmd) +{ + return nmd->pools[NETMAP_BUF_POOL]._objsize; +} #define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL) #define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v)) @@ -934,7 +1037,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index) -#if 0 // XXX unused +#if 0 /* currently unused */ /* Return the index associated to the given packet buffer */ #define netmap_buf_index(n, v) \ (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n)) @@ -1012,6 +1115,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) slot[i].buf_idx = index; slot[i].len = p->_objsize; slot[i].flags = 0; + slot[i].ptr = 0; } ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); @@ -1073,6 +1177,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) if (p->bitmap) nm_os_free(p->bitmap); p->bitmap = NULL; + if (p->invalid_bitmap) + nm_os_free(p->invalid_bitmap); + p->invalid_bitmap = NULL; if (p->lut) { u_int i; @@ -1083,15 +1190,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) * in the lut. */ for (i = 0; i < p->objtotal; i += p->_clustentries) { - if (p->lut[i].vaddr) - contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); + contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); } - bzero(p->lut, sizeof(struct lut_entry) * p->objtotal); -#ifdef linux - vfree(p->lut); -#else - nm_os_free(p->lut); -#endif + nm_free_lut(p->lut, p->objtotal); } p->lut = NULL; p->objtotal = 0; @@ -1201,19 +1302,6 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj return 0; } -static struct lut_entry * -nm_alloc_lut(u_int nobj) -{ - size_t n = sizeof(struct lut_entry) * nobj; - struct lut_entry *lut; -#ifdef linux - lut = vmalloc(n); -#else - lut = nm_os_malloc(n); -#endif - return lut; -} - /* call with NMA_LOCK held */ static int netmap_finalize_obj_allocator(struct netmap_obj_pool *p) @@ -1221,6 +1309,11 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) int i; /* must be signed */ size_t n; + if (p->lut) { + /* already finalized, nothing to do */ + return 0; + } + /* optimistically assume we have enough memory */ p->numclusters = p->_numclusters; p->objtotal = p->_objtotal; @@ -1231,18 +1324,8 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) goto clean; } - /* Allocate the bitmap */ - n = (p->objtotal + 31) / 32; - p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); - if (p->bitmap == NULL) { - D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, - p->name); - goto clean; - } - p->bitmap_slots = n; - /* - * Allocate clusters, init pointers and bitmap + * Allocate clusters, init pointers */ n = p->_clustsize; @@ -1270,7 +1353,6 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) goto out; lim = i / 2; for (i--; i >= lim; i--) { - p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) ); if (i % p->_clustentries == 0 && p->lut[i].vaddr) contigfree(p->lut[i].vaddr, n, M_NETMAP); @@ -1283,8 +1365,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) break; } /* - * Set bitmap and lut state for all buffers in the current - * cluster. + * Set lut state for all buffers in the current cluster. * * [i, lim) is the set of buffer indexes that cover the * current cluster. @@ -1294,15 +1375,13 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) * of p->_objsize. */ for (; i < lim; i++, clust += p->_objsize) { - p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) ); p->lut[i].vaddr = clust; +#if !defined(linux) && !defined(_WIN32) p->lut[i].paddr = vtophys(clust); +#endif } } - p->objfree = p->objtotal; p->memtotal = p->numclusters * p->_clustsize; - if (p->objfree == 0) - goto clean; if (netmap_verbose) D("Pre-allocated %d clusters (%d/%dKB) for '%s'", p->numclusters, p->_clustsize >> 10, @@ -1348,6 +1427,7 @@ static int netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { int i, lim = p->_objtotal; + struct netmap_lut *lut = &na->na_lut; if (na == NULL || na->pdev == NULL) return 0; @@ -1355,16 +1435,23 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) #if defined(__FreeBSD__) (void)i; (void)lim; + (void)lut; D("unsupported on FreeBSD"); - #elif defined(_WIN32) (void)i; (void)lim; - D("unsupported on Windows"); //XXX_ale, really? + (void)lut; + D("unsupported on Windows"); #else /* linux */ - for (i = 2; i < lim; i++) { - netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr); + ND("unmapping and freeing plut for %s", na->name); + if (lut->plut == NULL) + return 0; + for (i = 0; i < lim; i += p->_clustentries) { + if (lut->plut[i].paddr) + netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->_clustsize); } + nm_free_plut(lut->plut); + lut->plut = NULL; #endif /* linux */ return 0; @@ -1373,23 +1460,65 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) static int netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na) { + int error = 0; + int i, lim = p->objtotal; + struct netmap_lut *lut = &na->na_lut; + + if (na->pdev == NULL) + return 0; + #if defined(__FreeBSD__) + (void)i; + (void)lim; + (void)lut; D("unsupported on FreeBSD"); #elif defined(_WIN32) - D("unsupported on Windows"); //XXX_ale, really? + (void)i; + (void)lim; + (void)lut; + D("unsupported on Windows"); #else /* linux */ - int i, lim = p->_objtotal; - if (na->pdev == NULL) + if (lut->plut != NULL) { + ND("plut already allocated for %s", na->name); return 0; + } - for (i = 2; i < lim; i++) { - netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr, - p->lut[i].vaddr); + ND("allocating physical lut for %s", na->name); + lut->plut = nm_alloc_plut(lim); + if (lut->plut == NULL) { + D("Failed to allocate physical lut for %s", na->name); + return ENOMEM; + } + + for (i = 0; i < lim; i += p->_clustentries) { + lut->plut[i].paddr = 0; } + + for (i = 0; i < lim; i += p->_clustentries) { + int j; + + if (p->lut[i].vaddr == NULL) + continue; + + error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, + p->lut[i].vaddr, p->_clustsize); + if (error) { + D("Failed to map cluster #%d from the %s pool", i, p->name); + break; + } + + for (j = 1; j < p->_clustentries; j++) { + lut->plut[i + j].paddr = lut->plut[i + j - 1].paddr + p->_objsize; + } + } + + if (error) + netmap_mem_unmap(p, na); + #endif /* linux */ - return 0; + return error; } static int @@ -1406,9 +1535,10 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; } - /* buffers 0 and 1 are reserved */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; + nmd->lasterr = netmap_mem_init_bitmaps(nmd); + if (nmd->lasterr) + goto error; + nmd->flags |= NETMAP_MEM_FINALIZED; if (netmap_verbose) @@ -1430,23 +1560,25 @@ error: /* * allocator for private memory */ -static struct netmap_mem_d * -_netmap_mem_private_new(struct netmap_obj_params *p, int *perr) +static void * +_netmap_mem_private_new(size_t size, struct netmap_obj_params *p, + struct netmap_mem_ops *ops, int *perr) { struct netmap_mem_d *d = NULL; int i, err = 0; - d = nm_os_malloc(sizeof(struct netmap_mem_d)); + d = nm_os_malloc(size); if (d == NULL) { err = ENOMEM; goto error; } *d = nm_blueprint; + d->ops = ops; err = nm_mem_assign_id(d); if (err) - goto error; + goto error_free; snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id); for (i = 0; i < NETMAP_POOLS_NR; i++) { @@ -1461,14 +1593,18 @@ _netmap_mem_private_new(struct netmap_obj_params *p, int *perr) err = netmap_mem_config(d); if (err) - goto error; + goto error_rel_id; d->flags &= ~NETMAP_MEM_FINALIZED; return d; +error_rel_id: + NMA_LOCK_DESTROY(d); + nm_mem_release_id(d); +error_free: + nm_os_free(d); error: - netmap_mem_delete(d); if (perr) *perr = err; return NULL; @@ -1480,7 +1616,7 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, { struct netmap_mem_d *d = NULL; struct netmap_obj_params p[NETMAP_POOLS_NR]; - int i, err = 0; + int i; u_int v, maxd; /* account for the fake host rings */ txr++; @@ -1527,16 +1663,9 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, p[NETMAP_BUF_POOL].num, p[NETMAP_BUF_POOL].size); - d = _netmap_mem_private_new(p, perr); - if (d == NULL) - goto error; + d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr); return d; -error: - netmap_mem_delete(d); - if (perr) - *perr = err; - return NULL; } @@ -1581,14 +1710,14 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd) int err; /* update configuration if changed */ - if (netmap_mem2_config(nmd)) + if (netmap_mem_config(nmd)) goto out1; nmd->active++; if (nmd->flags & NETMAP_MEM_FINALIZED) { /* may happen if config is not changed */ - ND("nothing to do"); + D("nothing to do"); goto out; } @@ -1621,12 +1750,21 @@ netmap_mem2_delete(struct netmap_mem_d *nmd) nm_os_free(nmd); } +#ifdef WITH_EXTMEM +/* doubly linekd list of all existing external allocators */ +static struct netmap_mem_ext *netmap_mem_ext_list = NULL; +NM_MTX_T nm_mem_ext_list_lock; +#endif /* WITH_EXTMEM */ + int netmap_mem_init(void) { NM_MTX_INIT(nm_mem_list_lock); NMA_LOCK_INIT(&nm_mem); netmap_mem_get(&nm_mem); +#ifdef WITH_EXTMEM + NM_MTX_INIT(nm_mem_ext_list_lock); +#endif /* WITH_EXTMEM */ return (0); } @@ -1648,10 +1786,13 @@ netmap_free_rings(struct netmap_adapter *na) struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { - ND("skipping ring %s (ring %p, users %d)", - kring->name, ring, kring->users); + if (netmap_verbose) + D("NOT deleting ring %s (ring %p, users %d neekring %d)", + kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } + if (netmap_verbose) + D("deleting ring %s", kring->name); if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS) netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); netmap_ring_free(na->nm_mem, ring); @@ -1684,9 +1825,13 @@ netmap_mem2_rings_create(struct netmap_adapter *na) if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) { /* uneeded, or already created by somebody else */ - ND("skipping ring %s", kring->name); + if (netmap_verbose) + D("NOT creating ring %s (ring %p, users %d neekring %d)", + kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } + if (netmap_verbose) + D("creating %s", kring->name); ndesc = kring->nkr_num_slots; len = sizeof(struct netmap_ring) + ndesc * sizeof(struct netmap_slot); @@ -1707,7 +1852,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) ring->head = kring->rhead; ring->cur = kring->rcur; ring->tail = kring->rtail; - *(uint16_t *)(uintptr_t)&ring->nr_buf_size = + *(uint32_t *)(uintptr_t)&ring->nr_buf_size = netmap_mem_bufsize(na->nm_mem); ND("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); @@ -1876,7 +2021,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp); struct netmap_pools_info pi; - unsigned int memsize; + uint64_t memsize; uint16_t memid; int ret; @@ -1910,6 +2055,340 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) return 0; } +#ifdef WITH_EXTMEM +struct netmap_mem_ext { + struct netmap_mem_d up; + + struct page **pages; + int nr_pages; + struct netmap_mem_ext *next, *prev; +}; + +/* call with nm_mem_list_lock held */ +static void +netmap_mem_ext_register(struct netmap_mem_ext *e) +{ + NM_MTX_LOCK(nm_mem_ext_list_lock); + if (netmap_mem_ext_list) + netmap_mem_ext_list->prev = e; + e->next = netmap_mem_ext_list; + netmap_mem_ext_list = e; + e->prev = NULL; + NM_MTX_UNLOCK(nm_mem_ext_list_lock); +} + +/* call with nm_mem_list_lock held */ +static void +netmap_mem_ext_unregister(struct netmap_mem_ext *e) +{ + if (e->prev) + e->prev->next = e->next; + else + netmap_mem_ext_list = e->next; + if (e->next) + e->next->prev = e->prev; + e->prev = e->next = NULL; +} + +static int +netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages) +{ + int i; + + if (e->nr_pages != nr_pages) + return 0; + + for (i = 0; i < nr_pages; i++) + if (pages[i] != e->pages[i]) + return 0; + + return 1; +} + +static struct netmap_mem_ext * +netmap_mem_ext_search(struct page **pages, int nr_pages) +{ + struct netmap_mem_ext *e; + + NM_MTX_LOCK(nm_mem_ext_list_lock); + for (e = netmap_mem_ext_list; e; e = e->next) { + if (netmap_mem_ext_same_pages(e, pages, nr_pages)) { + netmap_mem_get(&e->up); + break; + } + } + NM_MTX_UNLOCK(nm_mem_ext_list_lock); + return e; +} + + +static void +netmap_mem_ext_free_pages(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + kunmap(pages[i]); + put_page(pages[i]); + } + nm_os_vfree(pages); +} + +static void +netmap_mem_ext_delete(struct netmap_mem_d *d) +{ + int i; + struct netmap_mem_ext *e = + (struct netmap_mem_ext *)d; + + netmap_mem_ext_unregister(e); + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &d->pools[i]; + + if (p->lut) { + nm_free_lut(p->lut, p->objtotal); + p->lut = NULL; + } + } + if (e->pages) { + netmap_mem_ext_free_pages(e->pages, e->nr_pages); + e->pages = NULL; + e->nr_pages = 0; + } + netmap_mem2_delete(d); +} + +static int +netmap_mem_ext_config(struct netmap_mem_d *nmd) +{ + return 0; +} + +struct netmap_mem_ops netmap_mem_ext_ops = { + .nmd_get_lut = netmap_mem2_get_lut, + .nmd_get_info = netmap_mem2_get_info, + .nmd_ofstophys = netmap_mem2_ofstophys, + .nmd_config = netmap_mem_ext_config, + .nmd_finalize = netmap_mem2_finalize, + .nmd_deref = netmap_mem2_deref, + .nmd_delete = netmap_mem_ext_delete, + .nmd_if_offset = netmap_mem2_if_offset, + .nmd_if_new = netmap_mem2_if_new, + .nmd_if_delete = netmap_mem2_if_delete, + .nmd_rings_create = netmap_mem2_rings_create, + .nmd_rings_delete = netmap_mem2_rings_delete +}; + +struct netmap_mem_d * +netmap_mem_ext_create(struct nmreq *nmr, int *perror) +{ + uintptr_t p = *(uintptr_t *)&nmr->nr_arg1; + struct netmap_pools_info pi; + int error = 0; + unsigned long end, start; + int nr_pages, res, i, j; + struct page **pages = NULL; + struct netmap_mem_ext *nme; + char *clust; + size_t off; + + error = copyin((void *)p, &pi, sizeof(pi)); + if (error) + goto out; + + // XXX sanity checks + if (pi.if_pool_objtotal == 0) + pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; + if (pi.if_pool_objsize == 0) + pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; + if (pi.ring_pool_objtotal == 0) + pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; + if (pi.ring_pool_objsize == 0) + pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; + if (pi.buf_pool_objtotal == 0) + pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; + if (pi.buf_pool_objsize == 0) + pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; + D("if %d %d ring %d %d buf %d %d", + pi.if_pool_objtotal, pi.if_pool_objsize, + pi.ring_pool_objtotal, pi.ring_pool_objsize, + pi.buf_pool_objtotal, pi.buf_pool_objsize); + + end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = p >> PAGE_SHIFT; + nr_pages = end - start; + + pages = nm_os_vmalloc(nr_pages * sizeof(*pages)); + if (pages == NULL) { + error = ENOMEM; + goto out; + } + +#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS + res = get_user_pages_unlocked( + p, + nr_pages, + pages, + FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags +#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS) + res = get_user_pages_unlocked( + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages); +#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS) + res = get_user_pages_unlocked( + current, + current->mm, + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages); +#else + down_read(¤t->mm->mmap_sem); + res = get_user_pages( + current, + current->mm, + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages, + NULL); + up_read(¤t->mm->mmap_sem); +#endif /* NETMAP_LINUX_GUP */ + + if (res < nr_pages) { + error = EFAULT; + goto out_unmap; + } + + nme = netmap_mem_ext_search(pages, nr_pages); + if (nme) { + netmap_mem_ext_free_pages(pages, nr_pages); + return &nme->up; + } + D("not found, creating new"); + + nme = _netmap_mem_private_new(sizeof(*nme), + (struct netmap_obj_params[]){ + { pi.if_pool_objsize, pi.if_pool_objtotal }, + { pi.ring_pool_objsize, pi.ring_pool_objtotal }, + { pi.buf_pool_objsize, pi.buf_pool_objtotal }}, + &netmap_mem_ext_ops, + &error); + if (nme == NULL) + goto out_unmap; + + /* from now on pages will be released by nme destructor; + * we let res = 0 to prevent release in out_unmap below + */ + res = 0; + nme->pages = pages; + nme->nr_pages = nr_pages; + nme->up.flags |= NETMAP_MEM_EXT; + + clust = kmap(*pages); + off = 0; + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &nme->up.pools[i]; + struct netmap_obj_params *o = &nme->up.params[i]; + + p->_objsize = o->size; + p->_clustsize = o->size; + p->_clustentries = 1; + + p->lut = nm_alloc_lut(o->num); + if (p->lut == NULL) { + error = ENOMEM; + goto out_delete; + } + + p->bitmap_slots = (o->num + sizeof(uint32_t) - 1) / sizeof(uint32_t); + p->invalid_bitmap = nm_os_malloc(sizeof(uint32_t) * p->bitmap_slots); + if (p->invalid_bitmap == NULL) { + error = ENOMEM; + goto out_delete; + } + + if (nr_pages == 0) { + p->objtotal = 0; + p->memtotal = 0; + p->objfree = 0; + continue; + } + + for (j = 0; j < o->num && nr_pages > 0; j++) { + size_t noff; + size_t skip; + + p->lut[j].vaddr = clust + off; + ND("%s %d at %p", p->name, j, p->lut[j].vaddr); + noff = off + p->_objsize; + if (noff < PAGE_SIZE) { + off = noff; + continue; + } + ND("too big, recomputing offset..."); + skip = PAGE_SIZE - (off & PAGE_MASK); + while (noff >= PAGE_SIZE) { + noff -= skip; + pages++; + nr_pages--; + ND("noff %zu page %p nr_pages %d", noff, + page_to_virt(*pages), nr_pages); + if (noff > 0 && !nm_isset(p->invalid_bitmap, j) && + (nr_pages == 0 || *pages != *(pages - 1) + 1)) + { + /* out of space or non contiguous, + * drop this object + * */ + p->invalid_bitmap[ (j>>5) ] |= 1U << (j & 31U); + ND("non contiguous at off %zu, drop", noff); + } + if (nr_pages == 0) + break; + skip = PAGE_SIZE; + } + off = noff; + if (nr_pages > 0) + clust = kmap(*pages); + } + p->objtotal = j; + p->numclusters = p->objtotal; + p->memtotal = j * p->_objsize; + ND("%d memtotal %u", j, p->memtotal); + } + + /* skip the first netmap_if, where the pools info reside */ + { + struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL]; + p->invalid_bitmap[0] |= 1U; + } + + netmap_mem_ext_register(nme); + + return &nme->up; + +out_delete: + netmap_mem_put(&nme->up); +out_unmap: + for (i = 0; i < res; i++) + put_page(pages[i]); + if (res) + nm_os_free(pages); +out: + if (perror) + *perror = error; + return NULL; + +} +#endif /* WITH_EXTMEM */ + + #ifdef WITH_PTNETMAP_GUEST struct mem_pt_if { struct mem_pt_if *next; @@ -2020,7 +2499,7 @@ netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) } static int -netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size, +netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, u_int *memflags, uint16_t *id) { int error = 0; @@ -2118,7 +2597,6 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) for (i = 0; i < nbuffers; i++) { ptnmd->buf_lut.lut[i].vaddr = vaddr; - ptnmd->buf_lut.lut[i].paddr = paddr; vaddr += bufsize; paddr += bufsize; } @@ -2256,11 +2734,17 @@ out: static void netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na) { - /* TODO: remove?? */ #if 0 - struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; - struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, - na->ifp); + enum txrx t; + + for_rx_tx(t) { + u_int i; + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + + kring->ring = NULL; + } + } #endif } diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index 81f601c4ca9f4..f0bee7a33fd53 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -136,9 +136,9 @@ struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *); int netmap_mem_rings_create(struct netmap_adapter *); void netmap_mem_rings_delete(struct netmap_adapter *); -void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); +int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); -int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id); +int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int* error); @@ -149,6 +149,14 @@ void netmap_mem_delete(struct netmap_mem_d *); struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int); void __netmap_mem_put(struct netmap_mem_d *, const char *, int); struct netmap_mem_d* netmap_mem_find(nm_memid_t); +unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); + +#ifdef WITH_EXTMEM +struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *); +#else /* !WITH_EXTMEM */ +#define netmap_mem_ext_create(nmr, _perr) \ + ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) +#endif /* WITH_EXTMEM */ #ifdef WITH_PTNETMAP_GUEST struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *, @@ -163,6 +171,7 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ +#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */ uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c index 8b788920ff806..e7cc05f5ab0f0 100644 --- a/sys/dev/netmap/netmap_monitor.c +++ b/sys/dev/netmap/netmap_monitor.c @@ -66,9 +66,7 @@ * has released them. In most cases, the consumer is a userspace * application which may have modified the frame contents. * - * Several copy monitors may be active on any ring. Zero-copy monitors, - * instead, need exclusive access to each of the monitored rings. This may - * change in the future, if we implement zero-copy monitor chaining. + * Several copy or zero-copy monitors may be active on any ring. * */ @@ -263,7 +261,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int if (zmon && z->prev != NULL) kring = z->prev; - /* sinchronize with concurrently running nm_sync()s */ + /* synchronize with concurrently running nm_sync()s */ nm_kr_stop(kring, NM_KR_LOCKED); if (nm_monitor_none(kring)) { @@ -329,7 +327,7 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring) if (zmon && mz->prev != NULL) kring = mz->prev; - /* sinchronize with concurrently running nm_sync()s */ + /* synchronize with concurrently running nm_sync()s */ nm_kr_stop(kring, NM_KR_LOCKED); if (zmon) { diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c index 6dc32b13ff8e3..e0b96a8e52a26 100644 --- a/sys/dev/netmap/netmap_offloadings.c +++ b/sys/dev/netmap/netmap_offloadings.c @@ -132,7 +132,7 @@ gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp, ND("TCP/UDP csum %x", be16toh(*check)); } -static int +static inline int vnet_hdr_is_bad(struct nm_vnet_hdr *vh) { uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; @@ -170,7 +170,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, u_int dst_slots = 0; if (unlikely(ft_p == ft_end)) { - RD(3, "No source slots to process"); + RD(1, "No source slots to process"); return; } @@ -189,11 +189,11 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, /* Initial sanity check on the source virtio-net header. If * something seems wrong, just drop the packet. */ if (src_len < na->up.virt_hdr_len) { - RD(3, "Short src vnet header, dropping"); + RD(1, "Short src vnet header, dropping"); return; } - if (vnet_hdr_is_bad(vh)) { - RD(3, "Bad src vnet header, dropping"); + if (unlikely(vnet_hdr_is_bad(vh))) { + RD(1, "Bad src vnet header, dropping"); return; } } @@ -266,7 +266,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, if (dst_slots >= *howmany) { /* We still have work to do, but we've run out of * dst slots, so we have to drop the packet. */ - RD(3, "Not enough slots, dropping GSO packet"); + ND(1, "Not enough slots, dropping GSO packet"); return; } @@ -281,7 +281,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, * encapsulation. */ for (;;) { if (src_len < ethhlen) { - RD(3, "Short GSO fragment [eth], dropping"); + RD(1, "Short GSO fragment [eth], dropping"); return; } ethertype = be16toh(*((uint16_t *) @@ -297,7 +297,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, (gso_hdr + ethhlen); if (src_len < ethhlen + 20) { - RD(3, "Short GSO fragment " + RD(1, "Short GSO fragment " "[IPv4], dropping"); return; } @@ -310,14 +310,14 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, iphlen = 40; break; default: - RD(3, "Unsupported ethertype, " + RD(1, "Unsupported ethertype, " "dropping GSO packet"); return; } ND(3, "type=%04x", ethertype); if (src_len < ethhlen + iphlen) { - RD(3, "Short GSO fragment [IP], dropping"); + RD(1, "Short GSO fragment [IP], dropping"); return; } @@ -329,7 +329,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, (gso_hdr + ethhlen + iphlen); if (src_len < ethhlen + iphlen + 20) { - RD(3, "Short GSO fragment " + RD(1, "Short GSO fragment " "[TCP], dropping"); return; } @@ -340,7 +340,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, } if (src_len < gso_hdr_len) { - RD(3, "Short GSO fragment [TCP/UDP], dropping"); + RD(1, "Short GSO fragment [TCP/UDP], dropping"); return; } diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 80843403b996a..48dde5382f77a 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -81,7 +81,8 @@ static int netmap_default_pipes = 0; /* ignored, kept for compatibility */ SYSBEGIN(vars_pipes); SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, + &netmap_default_pipes, 0, "For compatibility only"); SYSEND; /* allocate the pipe array in the parent adapter */ @@ -182,6 +183,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) u_int j, k, lim_tx = txkring->nkr_num_slots - 1, lim_rx = rxkring->nkr_num_slots - 1; int m, busy; + struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, @@ -208,18 +210,18 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) } while (limit-- > 0) { - struct netmap_slot *rs = &rxkring->ring->slot[j]; - struct netmap_slot *ts = &txkring->ring->slot[k]; + struct netmap_slot *rs = &rxring->slot[j]; + struct netmap_slot *ts = &txring->slot[k]; struct netmap_slot tmp; - /* swap the slots */ + __builtin_prefetch(ts + 1); + + /* swap the slots and report the buffer change */ tmp = *rs; + tmp.flags |= NS_BUF_CHANGED; *rs = *ts; - *ts = tmp; - - /* report the buffer change */ - ts->flags |= NS_BUF_CHANGED; rs->flags |= NS_BUF_CHANGED; + *ts = tmp; j = nm_next(j, lim_rx); k = nm_next(k, lim_tx); diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c index d3544a5b1728a..edb49dc504acd 100644 --- a/sys/dev/netmap/netmap_pt.c +++ b/sys/dev/netmap/netmap_pt.c @@ -169,19 +169,19 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail, #endif /* RATE */ struct ptnetmap_state { - /* Kthreads. */ - struct nm_kctx **kctxs; + /* Kthreads. */ + struct nm_kctx **kctxs; /* Shared memory with the guest (TX/RX) */ struct ptnet_csb_gh __user *csb_gh; struct ptnet_csb_hg __user *csb_hg; - bool stopped; + bool stopped; - /* Netmap adapter wrapping the backend. */ - struct netmap_pt_host_adapter *pth_na; + /* Netmap adapter wrapping the backend. */ + struct netmap_pt_host_adapter *pth_na; - IFRATE(struct rate_context rate_ctx;) + IFRATE(struct rate_context rate_ctx;) }; static inline void @@ -1268,13 +1268,11 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, } *na = &pth_na->up; - netmap_adapter_get(*na); - /* set parent busy, because attached for ptnetmap */ parent->na_flags |= NAF_BUSY; - strncpy(pth_na->up.name, parent->name, sizeof(pth_na->up.name)); strcat(pth_na->up.name, "-PTN"); + netmap_adapter_get(*na); DBG(D("%s ptnetmap request DONE", pth_na->up.name)); @@ -1350,7 +1348,7 @@ netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg, * go to sleep and we need to be notified by the host when more free * space is available. */ - if (nm_kr_txempty(kring)) { + if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ ptgh->guest_need_kick = 1; /* Double check */ @@ -1415,7 +1413,7 @@ netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg, * we need to be notified by the host when more RX slots have been * completed. */ - if (nm_kr_rxempty(kring)) { + if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ ptgh->guest_need_kick = 1; /* Double check */ @@ -1504,7 +1502,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg, if (arg->nm_mem == NULL) return ENOMEM; arg->na_flags |= NAF_MEM_OWNER; - error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter)); + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); if (error) return error; @@ -1517,7 +1515,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg, memset(&ptna->dr, 0, sizeof(ptna->dr)); ptna->dr.up.ifp = ifp; ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); - ptna->dr.up.nm_config = ptna->hwup.up.nm_config; + ptna->dr.up.nm_config = ptna->hwup.up.nm_config; ptna->backend_regifs = 0; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index 0df3d08f2a69c..d364699bce269 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -150,6 +150,8 @@ __FBSDID("$FreeBSD$"); #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) /* NM_FT_NULL terminates a list of slots in the ft */ #define NM_FT_NULL NM_BDG_BATCH_MAX +/* Default size for the Maximum Frame Size. */ +#define NM_BDG_MFS_DEFAULT 1514 /* @@ -160,7 +162,8 @@ __FBSDID("$FreeBSD$"); static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ SYSBEGIN(vars_vale); SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, + "Max batch size to be used in the bridge"); SYSEND; static int netmap_vp_create(struct nmreq *, struct ifnet *, @@ -226,9 +229,9 @@ struct nm_bridge { /* the forwarding table, MAC+ports. * XXX should be changed to an argument to be passed to - * the lookup function, and allocated on attach + * the lookup function */ - struct nm_hash_ent ht[NM_BDG_HASH]; + struct nm_hash_ent *ht; // allocated on attach #ifdef CONFIG_NET_NS struct net *ns; @@ -365,17 +368,20 @@ nm_find_bridge(const char *name, int create) } if (i == num_bridges && b) { /* name not found, can create entry */ /* initialize the bridge */ - strncpy(b->bdg_basename, name, namelen); ND("create new bridge %s with ports %d", b->bdg_basename, b->bdg_active_ports); + b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); + if (b->ht == NULL) { + D("failed to allocate hash table"); + return NULL; + } + strncpy(b->bdg_basename, name, namelen); b->bdg_namelen = namelen; b->bdg_active_ports = 0; for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ b->bdg_ops.lookup = netmap_bdg_learning; - /* reset the MAC address table */ - bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); NM_BNS_GET(b); } return b; @@ -503,6 +509,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) ND("now %d active ports", lim); if (lim == 0) { ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); bzero(&b->bdg_ops, sizeof(b->bdg_ops)); NM_BNS_PUT(b); } @@ -542,11 +549,14 @@ netmap_vp_dtor(struct netmap_adapter *na) netmap_bdg_detach_common(b, vpna->bdg_port, -1); } - if (vpna->autodelete && na->ifp != NULL) { - ND("releasing %s", na->ifp->if_xname); - NMG_UNLOCK(); - nm_os_vi_detach(na->ifp); - NMG_LOCK(); + if (na->ifp != NULL && !nm_iszombie(na)) { + WNA(na->ifp) = NULL; + if (vpna->autodelete) { + ND("releasing %s", na->ifp->if_xname); + NMG_UNLOCK(); + nm_os_vi_detach(na->ifp); + NMG_LOCK(); + } } } @@ -603,11 +613,15 @@ err: static int nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) { + uint64_t memsize; + int ret; nmr->nr_rx_rings = na->num_rx_rings; nmr->nr_tx_rings = na->num_tx_rings; nmr->nr_rx_slots = na->num_rx_desc; nmr->nr_tx_slots = na->num_tx_desc; - return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2); + ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2); + nmr->nr_memsize = (uint32_t)memsize; + return ret; } /* @@ -736,7 +750,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, for (j = 0; j < b->bdg_active_ports; j++) { i = b->bdg_port_index[j]; vpna = b->bdg_ports[i]; - // KASSERT(na != NULL); ND("checking %s", vpna->up.name); if (!strcmp(vpna->up.name, nr_name)) { netmap_adapter_get(&vpna->up); @@ -788,6 +801,18 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, } else { struct netmap_adapter *hw; + /* the vale:nic syntax is only valid for some commands */ + switch (nmr->nr_cmd) { + case NETMAP_BDG_ATTACH: + case NETMAP_BDG_DETACH: + case NETMAP_BDG_POLLING_ON: + case NETMAP_BDG_POLLING_OFF: + break; /* ok */ + default: + error = EINVAL; + goto out; + } + error = netmap_get_hw_na(ifp, nmd, &hw); if (error || hw == NULL) goto out; @@ -848,6 +873,12 @@ nm_bdg_ctl_attach(struct nmreq *nmr) } } + /* XXX check existing one */ + error = netmap_get_bdg_na(nmr, &na, nmd, 0); + if (!error) { + error = EBUSY; + goto unref_exit; + } error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); if (error) /* no device */ goto unlock_exit; @@ -1149,9 +1180,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) bna->na_polling_state = bps; bps->bna = bna; - /* disable interrupt if possible */ - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 0); + /* disable interrupts if possible */ + nma_intr_enable(bna->hwna, 0); /* start kthread now */ error = nm_bdg_polling_start_kthreads(bps); if (error) { @@ -1159,8 +1189,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) nm_os_free(bps->kthreads); nm_os_free(bps); bna->na_polling_state = NULL; - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 1); + nma_intr_enable(bna->hwna, 1); } return error; } @@ -1180,9 +1209,8 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) bps->configured = false; nm_os_free(bps); bna->na_polling_state = NULL; - /* reenable interrupt */ - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 1); + /* reenable interrupts */ + nma_intr_enable(bna->hwna, 1); return 0; } @@ -1577,7 +1605,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) BDG_WLOCK(vpna->na_bdg); if (onoff) { for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = &NMR(na, t)[i]; if (nm_kring_pending_on(kring)) @@ -1593,7 +1621,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = &NMR(na, t)[i]; if (nm_kring_pending_off(kring)) @@ -1657,7 +1685,7 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, */ if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ uint8_t *s = buf+6; - sh = nm_bridge_rthash(s); // XXX hash of source + sh = nm_bridge_rthash(s); /* hash of source */ /* update source port forwarding entry */ na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ ht[sh].ports = mysrc; @@ -1667,11 +1695,10 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, } dst = NM_BDG_BROADCAST; if ((buf[0] & 1) == 0) { /* unicast */ - dh = nm_bridge_rthash(buf); // XXX hash of dst + dh = nm_bridge_rthash(buf); /* hash of dst */ if (ht[dh].mac == dmac) { /* found dst */ dst = ht[dh].ports; } - /* XXX otherwise return NM_BDG_UNKNOWN ? */ } return dst; } @@ -1785,10 +1812,8 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); - if (dst_port == NM_BDG_NOPORT) + if (dst_port >= NM_BDG_NOPORT) continue; /* this packet is identified to be dropped */ - else if (unlikely(dst_port > NM_BDG_MAXPORTS)) - continue; else if (dst_port == NM_BDG_BROADCAST) dst_ring = 0; /* broadcasts always go to ring 0 */ else if (unlikely(dst_port == me || @@ -1882,10 +1907,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, needed = d->bq_len + brddst->bq_len; if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { - if (netmap_verbose) { - RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, - dst_na->up.virt_hdr_len); - } + if (netmap_verbose) { + RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, + dst_na->up.virt_hdr_len); + } /* There is a virtio-net header/offloadings mismatch between * source and destination. The slower mismatch datapath will * be used to cope with all the mismatches. @@ -1902,6 +1927,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, * TCPv4 we must account for ethernet header, IP header * and TCPv4 header). */ + KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0")); needed = (needed * na->mfs) / (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); @@ -1916,6 +1942,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, dst_nr = dst_nr % nrings; kring = &dst_na->up.rx_rings[dst_nr]; ring = kring->ring; + /* the destination ring may have not been opened for RX */ + if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) + goto cleanup; lim = kring->nkr_num_slots - 1; retry: @@ -2196,7 +2225,7 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; if (vpna->na_bdg) - return EBUSY; + return netmap_bwrap_attach(name, na); na->na_vp = vpna; strncpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; @@ -2248,7 +2277,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, nm_bound_var(&nmr->nr_arg3, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); na->num_rx_desc = nmr->nr_rx_slots; - vpna->mfs = 1514; + /* Set the mfs to a default value, as it is needed on the VALE + * mismatch datapath. XXX We should set it according to the MTU + * known to the kernel. */ + vpna->mfs = NM_BDG_MFS_DEFAULT; vpna->last_smac = ~0llu; /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? vpna->mfs = netmap_buf_size; */ @@ -2330,7 +2362,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na) struct nm_bridge *b = bna->up.na_bdg, *bh = bna->host.na_bdg; - netmap_mem_put(bna->host.up.nm_mem); + if (bna->host.up.nm_mem) + netmap_mem_put(bna->host.up.nm_mem); if (b) { netmap_bdg_detach_common(b, bna->up.bdg_port, @@ -2459,28 +2492,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) hostna->up.na_lut = na->na_lut; } - /* cross-link the netmap rings - * The original number of rings comes from hwna, - * rx rings on one side equals tx rings on the other. - */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(hwna, r)[i].ring = NMR(na, t)[i].ring; - } - } - - if (na->na_flags & NAF_HOST_RINGS) { - struct netmap_adapter *hna = &hostna->up; - /* the hostna rings are the host rings of the bwrap. - * The corresponding krings must point back to the - * hostna - */ - hna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hna->tx_rings[0].na = hna; - hna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hna->rx_rings[0].na = hna; - } } /* pass down the pending ring state information */ @@ -2497,9 +2508,10 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* copy up the current ring state information */ for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) - NMR(na, t)[i].nr_mode = - NMR(hwna, t)[i].nr_mode; + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(hwna, t)[i]; + NMR(na, t)[i].nr_mode = kring->nr_mode; + } } /* impersonate a netmap_vp_adapter */ @@ -2537,6 +2549,14 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) hwna->na_lut.lut = NULL; hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; + + /* pass ownership of the netmap rings to the hwna */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + NMR(na, t)[i].ring = NULL; + } + } + } return 0; @@ -2570,6 +2590,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; + struct netmap_adapter *hostna = &bna->host.up; int i, error = 0; enum txrx t; @@ -2586,16 +2607,49 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) goto err_del_vp_rings; } - /* get each ring slot number from the corresponding hwna ring */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; + /* increment the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { + NMR(hwna, t)[i].users++; } + } + + /* now create the actual rings */ + error = netmap_mem_rings_create(hwna); + if (error) { + goto err_dec_users; + } + + /* cross-link the netmap rings + * The original number of rings comes from hwna, + * rx rings on one side equals tx rings on the other. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { + NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; + NMR(na, t)[i].ring = NMR(hwna, r)[i].ring; + } + } + + if (na->na_flags & NAF_HOST_RINGS) { + /* the hostna rings are the host rings of the bwrap. + * The corresponding krings must point back to the + * hostna + */ + hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; + hostna->tx_rings[0].na = hostna; + hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; + hostna->rx_rings[0].na = hostna; } return 0; +err_dec_users: + for_rx_tx(t) { + NMR(hwna, t)[i].users--; + } + hwna->nm_krings_delete(hwna); err_del_vp_rings: netmap_vp_krings_delete(na); @@ -2609,9 +2663,20 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; + enum txrx t; + int i; ND("%s", na->name); + /* decrement the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { + NMR(hwna, t)[i].users--; + } + } + + /* delete any netmap rings that are no longer needed */ + netmap_mem_rings_delete(hwna); hwna->nm_krings_delete(hwna); netmap_vp_krings_delete(na); } @@ -2699,7 +2764,7 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) if (npriv == NULL) return ENOMEM; npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ - error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW); + error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); if (error) { netmap_priv_delete(npriv); return error; @@ -2766,6 +2831,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->nm_mem = netmap_mem_get(hwna->nm_mem); na->virt_hdr_len = hwna->virt_hdr_len; bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ + /* Set the mfs, needed on the VALE mismatch datapath. */ + bna->up.mfs = NM_BDG_MFS_DEFAULT; bna->hwna = hwna; netmap_adapter_get(hwna); @@ -2793,6 +2860,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->na_hostvp = hwna->na_hostvp = hostna->na_hostvp = &bna->host; hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ + bna->host.mfs = NM_BDG_MFS_DEFAULT; } ND("%s<->%s txr %d txd %d rxr %d rxd %d", |
