aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap
diff options
context:
space:
mode:
authorVincenzo Maffione <vmaffione@FreeBSD.org>2018-04-09 09:24:26 +0000
committerVincenzo Maffione <vmaffione@FreeBSD.org>2018-04-09 09:24:26 +0000
commit4f80b14ce2b17100b12dc3a346fb9e6e76764e11 (patch)
treee7c1347079629914a4d8c369d8d70121ee53904f /sys/dev/netmap
parentdf4531ffd910985c8ec5a288a69adff34ceb6c03 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap')
-rw-r--r--sys/dev/netmap/if_em_netmap.h4
-rw-r--r--sys/dev/netmap/if_igb_netmap.h4
-rw-r--r--sys/dev/netmap/if_ixl_netmap.h3
-rw-r--r--sys/dev/netmap/if_lem_netmap.h4
-rw-r--r--sys/dev/netmap/if_ptnet.c14
-rw-r--r--sys/dev/netmap/if_re_netmap.h3
-rw-r--r--sys/dev/netmap/if_vtnet_netmap.h38
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h3
-rw-r--r--sys/dev/netmap/netmap.c296
-rw-r--r--sys/dev/netmap/netmap_freebsd.c24
-rw-r--r--sys/dev/netmap/netmap_generic.c45
-rw-r--r--sys/dev/netmap/netmap_kern.h208
-rw-r--r--sys/dev/netmap/netmap_mem2.c758
-rw-r--r--sys/dev/netmap/netmap_mem2.h13
-rw-r--r--sys/dev/netmap/netmap_monitor.c8
-rw-r--r--sys/dev/netmap/netmap_offloadings.c24
-rw-r--r--sys/dev/netmap/netmap_pipe.c18
-rw-r--r--sys/dev/netmap/netmap_pt.c24
-rw-r--r--sys/dev/netmap/netmap_vale.c202
19 files changed, 1230 insertions, 463 deletions
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index 26f85fdf321d5..299bc3837d5ea 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -235,8 +235,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
-
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@@ -247,7 +245,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index cd80f2663bc70..df15ceee7d8cb 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -217,8 +217,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
-
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@@ -229,7 +227,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h
index ea0bf35dea67e..e9b036d34e874 100644
--- a/sys/dev/netmap/if_ixl_netmap.h
+++ b/sys/dev/netmap/if_ixl_netmap.h
@@ -331,7 +331,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
*/
if (netmap_no_pendintr || force_update) {
int crclen = ixl_crcstrip ? 0 : 4;
- uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
@@ -346,7 +345,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
break;
ring->slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
>> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen;
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index 56d65733a4f5f..f8ba2bb716c85 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -216,8 +216,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
-
nic_i = adapter->next_rx_desc_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@@ -234,7 +232,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
len = 0;
}
ring->slot[nm_i].len = len;
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
bus_dmamap_sync(adapter->rxtag,
adapter->rx_buffer_area[nic_i].map,
BUS_DMASYNC_POSTREAD);
diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c
index becf7e4e8f04b..1805a7f31e48d 100644
--- a/sys/dev/netmap/if_ptnet.c
+++ b/sys/dev/netmap/if_ptnet.c
@@ -216,6 +216,7 @@ static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
static int ptnet_nm_register(struct netmap_adapter *na, int onoff);
static int ptnet_nm_txsync(struct netmap_kring *kring, int flags);
static int ptnet_nm_rxsync(struct netmap_kring *kring, int flags);
+static void ptnet_nm_intr(struct netmap_adapter *na, int onoff);
static void ptnet_tx_intr(void *opaque);
static void ptnet_rx_intr(void *opaque);
@@ -477,6 +478,7 @@ ptnet_attach(device_t dev)
na_arg.nm_krings_create = ptnet_nm_krings_create;
na_arg.nm_krings_delete = ptnet_nm_krings_delete;
na_arg.nm_dtor = ptnet_nm_dtor;
+ na_arg.nm_intr = ptnet_nm_intr;
na_arg.nm_register = ptnet_nm_register;
na_arg.nm_txsync = ptnet_nm_txsync;
na_arg.nm_rxsync = ptnet_nm_rxsync;
@@ -1299,6 +1301,18 @@ ptnet_nm_rxsync(struct netmap_kring *kring, int flags)
}
static void
+ptnet_nm_intr(struct netmap_adapter *na, int onoff)
+{
+ struct ptnet_softc *sc = if_getsoftc(na->ifp);
+ int i;
+
+ for (i = 0; i < sc->num_rings; i++) {
+ struct ptnet_queue *pq = sc->queues + i;
+ pq->ptgh->guest_need_kick = onoff;
+ }
+}
+
+static void
ptnet_tx_intr(void *opaque)
{
struct ptnet_queue *pq = opaque;
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index 2cb3454c5f312..e7dd087acc676 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -201,7 +201,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
* is to stop right before nm_hwcur.
*/
if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
@@ -218,7 +217,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
ring->slot[nm_i].len = total_len;
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h
index 4bed0e718dd49..10789c53d1f06 100644
--- a/sys/dev/netmap/if_vtnet_netmap.h
+++ b/sys/dev/netmap/if_vtnet_netmap.h
@@ -122,6 +122,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
struct SOFTC_T *sc = ifp->if_softc;
struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
struct virtqueue *vq = txq->vtntx_vq;
+ int interrupts = !(kring->nr_kflags & NKR_NOINTR);
/*
* First part: process new packets to send.
@@ -179,7 +180,9 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
ring->head, ring->tail, virtqueue_nused(vq),
(virtqueue_dump(vq), 1));
virtqueue_notify(vq);
- virtqueue_enable_intr(vq); // like postpone with 0
+ if (interrupts) {
+ virtqueue_enable_intr(vq); // like postpone with 0
+ }
}
@@ -209,7 +212,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) {
ND(3, "disable intr, hwcur %d", nm_i);
virtqueue_disable_intr(vq);
- } else {
+ } else if (interrupts) {
ND(3, "enable intr, hwcur %d", nm_i);
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
}
@@ -277,6 +280,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+ int interrupts = !(kring->nr_kflags & NKR_NOINTR);
/* device-specific */
struct SOFTC_T *sc = ifp->if_softc;
@@ -297,7 +301,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
* and vtnet_netmap_init_buffers().
*/
if (netmap_no_pendintr || force_update) {
- uint16_t slot_flags = kring->nkr_slot_flags;
struct netmap_adapter *token;
nm_i = kring->nr_hwtail;
@@ -309,7 +312,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
break;
if (likely(token == (void *)rxq)) {
ring->slot[nm_i].len = len;
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
nm_i = nm_next(nm_i, lim);
n++;
} else {
@@ -334,7 +337,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
kring->nr_hwcur = err;
virtqueue_notify(vq);
/* After draining the queue may need an intr from the hypervisor */
- vtnet_rxq_enable_intr(rxq);
+ if (interrupts) {
+ vtnet_rxq_enable_intr(rxq);
+ }
}
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
@@ -345,6 +350,28 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
}
+/* Enable/disable interrupts on all virtqueues. */
+static void
+vtnet_netmap_intr(struct netmap_adapter *na, int onoff)
+{
+ struct SOFTC_T *sc = na->ifp->if_softc;
+ int i;
+
+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
+ struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
+ struct vtnet_txq *txq = &sc->vtnet_txqs[i];
+ struct virtqueue *txvq = txq->vtntx_vq;
+
+ if (onoff) {
+ vtnet_rxq_enable_intr(rxq);
+ virtqueue_enable_intr(txvq);
+ } else {
+ vtnet_rxq_disable_intr(rxq);
+ virtqueue_disable_intr(txvq);
+ }
+ }
+}
+
/* Make RX virtqueues buffers pointing to netmap buffers. */
static int
vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
@@ -417,6 +444,7 @@ vtnet_netmap_attach(struct SOFTC_T *sc)
na.nm_txsync = vtnet_netmap_txsync;
na.nm_rxsync = vtnet_netmap_rxsync;
na.nm_config = vtnet_netmap_config;
+ na.nm_intr = vtnet_netmap_intr;
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
D("max rings %d", sc->vtnet_max_vq_pairs);
netmap_attach(&na);
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index 419e08f0f6def..30da631917750 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -397,7 +397,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
*/
if (netmap_no_pendintr || force_update) {
int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4;
- uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
@@ -409,7 +408,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
- ring->slot[nm_i].flags = slot_flags;
+ ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index edcc308e8d870..3c5551bad1569 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -482,10 +482,8 @@ ports attached to the switch)
int netmap_verbose;
static int netmap_no_timestamp; /* don't timestamp on rxsync */
-int netmap_mitigate = 1;
int netmap_no_pendintr = 1;
int netmap_txsync_retry = 2;
-int netmap_flags = 0; /* debug flags */
static int netmap_fwd = 0; /* force transparent forwarding */
/*
@@ -515,7 +513,9 @@ int netmap_generic_mit = 100*1000;
* Anyway users looking for the best performance should
* use native adapters.
*/
+#ifdef linux
int netmap_generic_txqdisc = 1;
+#endif
/* Default number of slots and queues for generic adapters. */
int netmap_generic_ringsize = 1024;
@@ -539,21 +539,32 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
-SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
- CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
+SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
+ 0, "Always look for new received packets.");
SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
- &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
+ &netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
-SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
+ "Force NR_FORWARD mode");
+SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
+ "Adapter mode. 0 selects the best option available,"
+ "1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
+ 0, "RX notification interval in nanoseconds");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
+ &netmap_generic_ringsize, 0,
+ "Number of per-ring slots for emulated netmap mode");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
+ &netmap_generic_rings, 0,
+ "Number of TX/RX queues for emulated netmap adapters");
+#ifdef linux
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
+ &netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
+#endif
+SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
+ 0, "Allow ptnet devices to use virtio-net headers");
+SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
+ &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
SYSEND;
@@ -912,7 +923,19 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
netmap_krings_delete(na);
}
-
+static void
+netmap_mem_drop(struct netmap_adapter *na)
+{
+ int last = netmap_mem_deref(na->nm_mem, na);
+ /* if the native allocator had been overrided on regif,
+ * restore it now and drop the temporary one
+ */
+ if (last && na->nm_mem_prev) {
+ netmap_mem_put(na->nm_mem);
+ na->nm_mem = na->nm_mem_prev;
+ na->nm_mem_prev = NULL;
+ }
+}
/*
* Undo everything that was done in netmap_do_regif(). In particular,
@@ -980,7 +1003,7 @@ netmap_do_unregif(struct netmap_priv_d *priv)
/* delete the nifp */
netmap_mem_if_delete(na, priv->np_nifp);
/* drop the allocator */
- netmap_mem_deref(na->nm_mem, na);
+ netmap_mem_drop(na);
/* mark the priv as unregistered */
priv->np_na = NULL;
priv->np_nifp = NULL;
@@ -1289,7 +1312,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
slot->len = len;
- slot->flags = kring->nkr_slot_flags;
+ slot->flags = 0;
nm_i = nm_next(nm_i, lim);
mbq_enqueue(&fq, m);
}
@@ -1409,7 +1432,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adap
assign_mem:
if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
(*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
- netmap_mem_put((*na)->nm_mem);
+ (*na)->nm_mem_prev = (*na)->nm_mem;
(*na)->nm_mem = netmap_mem_get(nmd);
}
@@ -1896,7 +1919,8 @@ netmap_krings_get(struct netmap_priv_d *priv)
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
- ND("%s: grabbing tx [%d, %d) rx [%d, %d)",
+ if (netmap_verbose)
+ D("%s: grabbing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@@ -2059,9 +2083,57 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
if (na->active_fds == 0) {
/*
* If this is the first registration of the adapter,
- * create the in-kernel view of the netmap rings,
- * the netmap krings.
+ * perform sanity checks and create the in-kernel view
+ * of the netmap rings (the netmap krings).
*/
+ if (na->ifp) {
+ /* This netmap adapter is attached to an ifnet. */
+ unsigned nbs = netmap_mem_bufsize(na->nm_mem);
+ unsigned mtu = nm_os_ifnet_mtu(na->ifp);
+ /* The maximum amount of bytes that a single
+ * receive or transmit NIC descriptor can hold. */
+ unsigned hw_max_slot_len = 4096;
+
+ if (mtu <= hw_max_slot_len) {
+ /* The MTU fits a single NIC slot. We only
+ * Need to check that netmap buffers are
+ * large enough to hold an MTU. NS_MOREFRAG
+ * cannot be used in this case. */
+ if (nbs < mtu) {
+ nm_prerr("error: netmap buf size (%u) "
+ "< device MTU (%u)", nbs, mtu);
+ error = EINVAL;
+ goto err_drop_mem;
+ }
+ } else {
+ /* More NIC slots may be needed to receive
+ * or transmit a single packet. Check that
+ * the adapter supports NS_MOREFRAG and that
+ * netmap buffers are large enough to hold
+ * the maximum per-slot size. */
+ if (!(na->na_flags & NAF_MOREFRAG)) {
+ nm_prerr("error: large MTU (%d) needed "
+ "but %s does not support "
+ "NS_MOREFRAG", mtu,
+ na->ifp->if_xname);
+ error = EINVAL;
+ goto err_drop_mem;
+ } else if (nbs < hw_max_slot_len) {
+ nm_prerr("error: using NS_MOREFRAG on "
+ "%s requires netmap buf size "
+ ">= %u", na->ifp->if_xname,
+ hw_max_slot_len);
+ error = EINVAL;
+ goto err_drop_mem;
+ } else {
+ nm_prinf("info: netmap application on "
+ "%s needs to support "
+ "NS_MOREFRAG "
+ "(MTU=%u,netmap_buf_size=%u)",
+ na->ifp->if_xname, mtu, nbs);
+ }
+ }
+ }
/*
* Depending on the adapter, this may also create
@@ -2128,15 +2200,15 @@ err_put_lut:
memset(&na->na_lut, 0, sizeof(na->na_lut));
err_del_if:
netmap_mem_if_delete(na, nifp);
-err_rel_excl:
- netmap_krings_put(priv);
err_del_rings:
netmap_mem_rings_delete(na);
+err_rel_excl:
+ netmap_krings_put(priv);
err_del_krings:
if (na->active_fds == 0)
na->nm_krings_delete(na);
err_drop_mem:
- netmap_mem_deref(na->nm_mem, na);
+ netmap_mem_drop(na);
err:
priv->np_na = NULL;
return error;
@@ -2224,6 +2296,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
do {
/* memsize is always valid */
u_int memflags;
+ uint64_t memsize;
if (nmr->nr_name[0] != '\0') {
@@ -2243,10 +2316,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
}
}
- error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
+ error = netmap_mem_get_info(nmd, &memsize, &memflags,
&nmr->nr_arg2);
if (error)
break;
+ nmr->nr_memsize = (uint32_t)memsize;
if (na == NULL) /* only memory info */
break;
nmr->nr_offset = 0;
@@ -2304,6 +2378,17 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
}
NMG_UNLOCK();
break;
+ } else if (i == NETMAP_POOLS_CREATE) {
+ nmd = netmap_mem_ext_create(nmr, &error);
+ if (nmd == NULL)
+ break;
+ /* reset the fields used by POOLS_CREATE to
+ * avoid confusing the rest of the code
+ */
+ nmr->nr_cmd = 0;
+ nmr->nr_arg1 = 0;
+ nmr->nr_arg2 = 0;
+ nmr->nr_arg3 = 0;
} else if (i != 0) {
D("nr_cmd must be 0 not %d", i);
error = EINVAL;
@@ -2314,7 +2399,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
NMG_LOCK();
do {
u_int memflags;
- struct ifnet *ifp;
+ uint64_t memsize;
if (priv->np_nifp != NULL) { /* thread already registered */
error = EBUSY;
@@ -2356,12 +2441,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
- error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
+ error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags,
&nmr->nr_arg2);
if (error) {
netmap_do_unregif(priv);
break;
}
+ nmr->nr_memsize = (uint32_t)memsize;
if (memflags & NETMAP_MEM_PRIVATE) {
*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
}
@@ -2533,7 +2619,6 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
#define want_tx want[NR_TX]
#define want_rx want[NR_RX]
struct mbq q; /* packets from RX hw queues to host stack */
- enum txrx t;
/*
* In order to avoid nested locks, we need to "double check"
@@ -2585,14 +2670,15 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
check_all_tx = nm_si_user(priv, NR_TX);
check_all_rx = nm_si_user(priv, NR_RX);
+#ifdef __FreeBSD__
/*
* We start with a lock free round which is cheap if we have
* slots available. If this fails, then lock and call the sync
- * routines.
+ * routines. We can't do this on Linux, as the contract says
+ * that we must call nm_os_selrecord() unconditionally.
*/
-#if 1 /* new code- call rx if any of the ring needs to release or read buffers */
if (want_tx) {
- t = NR_TX;
+ enum txrx t = NR_TX;
for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
kring = &NMR(na, t)[i];
/* XXX compare ring->cur and kring->tail */
@@ -2603,8 +2689,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
}
}
if (want_rx) {
+ enum txrx t = NR_RX;
want_rx = 0; /* look for a reason to run the handlers */
- t = NR_RX;
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(na, t)[i];
if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
@@ -2615,24 +2701,20 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
if (!want_rx)
revents |= events & (POLLIN | POLLRDNORM); /* we have data */
}
-#else /* old code */
- for_rx_tx(t) {
- for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
- /* XXX compare ring->cur and kring->tail */
- if (!nm_ring_empty(kring->ring)) {
- revents |= want[t];
- want[t] = 0; /* also breaks the loop */
- }
- }
- }
-#endif /* old code */
+#endif
+
+#ifdef linux
+ /* The selrecord must be unconditional on linux. */
+ nm_os_selrecord(sr, check_all_tx ?
+ &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
+ nm_os_selrecord(sr, check_all_rx ?
+ &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
+#endif /* linux */
/*
* If we want to push packets out (priv->np_txpoll) or
* want_tx is still set, we must issue txsync calls
* (on all rings, to avoid that the tx rings stall).
- * XXX should also check cur != hwcur on the tx rings.
* Fortunately, normal tx mode has np_txpoll set.
*/
if (priv->np_txpoll || want_tx) {
@@ -2649,6 +2731,12 @@ flush_tx:
kring = &na->tx_rings[i];
ring = kring->ring;
+ /*
+ * Don't try to txsync this TX ring if we already found some
+ * space in some of the TX rings (want_tx == 0) and there are no
+ * TX slots in this ring that need to be flushed to the NIC
+ * (cur == hwcur).
+ */
if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
continue;
@@ -2676,14 +2764,18 @@ flush_tx:
if (found) { /* notify other listeners */
revents |= want_tx;
want_tx = 0;
+#ifndef linux
kring->nm_notify(kring, 0);
+#endif /* linux */
}
}
/* if there were any packet to forward we must have handled them by now */
send_down = 0;
if (want_tx && retry_tx && sr) {
+#ifndef linux
nm_os_selrecord(sr, check_all_tx ?
&na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
+#endif /* !linux */
retry_tx = 0;
goto flush_tx;
}
@@ -2734,14 +2826,18 @@ do_retry_rx:
if (found) {
revents |= want_rx;
retry_rx = 0;
+#ifndef linux
kring->nm_notify(kring, 0);
+#endif /* linux */
}
}
+#ifndef linux
if (retry_rx && sr) {
nm_os_selrecord(sr, check_all_rx ?
&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
}
+#endif /* !linux */
if (send_down || retry_rx) {
retry_rx = 0;
if (send_down)
@@ -2766,6 +2862,44 @@ do_retry_rx:
#undef want_rx
}
+int
+nma_intr_enable(struct netmap_adapter *na, int onoff)
+{
+ bool changed = false;
+ enum txrx t;
+ int i;
+
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+ int on = !(kring->nr_kflags & NKR_NOINTR);
+
+ if (!!onoff != !!on) {
+ changed = true;
+ }
+ if (onoff) {
+ kring->nr_kflags &= ~NKR_NOINTR;
+ } else {
+ kring->nr_kflags |= NKR_NOINTR;
+ }
+ }
+ }
+
+ if (!changed) {
+ return 0; /* nothing to do */
+ }
+
+ if (!na->nm_intr) {
+ D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
+ na->name);
+ return -1;
+ }
+
+ na->nm_intr(na, onoff);
+
+ return 0;
+}
+
/*-------------------- driver support routines -------------------*/
@@ -2804,6 +2938,7 @@ netmap_attach_common(struct netmap_adapter *na)
if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
na->if_input = na->ifp->if_input; /* for netmap_send_up */
}
+ na->pdev = na; /* make sure netmap_mem_map() is called */
#endif /* __FreeBSD__ */
if (na->nm_krings_create == NULL) {
/* we assume that we have been called by a driver,
@@ -2832,22 +2967,6 @@ netmap_attach_common(struct netmap_adapter *na)
return 0;
}
-
-/* standard cleanup, called by all destructors */
-void
-netmap_detach_common(struct netmap_adapter *na)
-{
- if (na->tx_rings) { /* XXX should not happen */
- D("freeing leftover tx_rings");
- na->nm_krings_delete(na);
- }
- netmap_pipe_dealloc(na);
- if (na->nm_mem)
- netmap_mem_put(na->nm_mem);
- bzero(na, sizeof(*na));
- nm_os_free(na);
-}
-
/* Wrapper for the register callback provided netmap-enabled
* hardware drivers.
* nm_iszombie(na) means that the driver module has been
@@ -2900,7 +3019,7 @@ netmap_hw_dtor(struct netmap_adapter *na)
* Return 0 on success, ENOMEM otherwise.
*/
int
-netmap_attach_ext(struct netmap_adapter *arg, size_t size)
+netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
{
struct netmap_hw_adapter *hwna = NULL;
struct ifnet *ifp = NULL;
@@ -2912,15 +3031,27 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size)
if (arg == NULL || arg->ifp == NULL)
goto fail;
+
ifp = arg->ifp;
+ if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ /* If NA(ifp) is not null but there is no valid netmap
+ * adapter it means that someone else is using the same
+ * pointer (e.g. ax25_ptr on linux). This happens for
+ * instance when also PF_RING is in use. */
+ D("Error: netmap adapter hook is busy");
+ return EBUSY;
+ }
+
hwna = nm_os_malloc(size);
if (hwna == NULL)
goto fail;
hwna->up = *arg;
hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
- hwna->nm_hw_register = hwna->up.nm_register;
- hwna->up.nm_register = netmap_hw_reg;
+ if (override_reg) {
+ hwna->nm_hw_register = hwna->up.nm_register;
+ hwna->up.nm_register = netmap_hw_reg;
+ }
if (netmap_attach_common(&hwna->up)) {
nm_os_free(hwna);
goto fail;
@@ -2939,6 +3070,7 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size)
#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
}
hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
+ hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
if (ifp->ethtool_ops) {
hwna->nm_eto = *ifp->ethtool_ops;
}
@@ -2968,7 +3100,8 @@ fail:
int
netmap_attach(struct netmap_adapter *arg)
{
- return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter));
+ return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
+ 1 /* override nm_reg */);
}
@@ -2996,7 +3129,15 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
if (na->nm_dtor)
na->nm_dtor(na);
- netmap_detach_common(na);
+ if (na->tx_rings) { /* XXX should not happen */
+ D("freeing leftover tx_rings");
+ na->nm_krings_delete(na);
+ }
+ netmap_pipe_dealloc(na);
+ if (na->nm_mem)
+ netmap_mem_put(na->nm_mem);
+ bzero(na, sizeof(*na));
+ nm_os_free(na);
return 1;
}
@@ -3029,15 +3170,14 @@ netmap_detach(struct ifnet *ifp)
NMG_LOCK();
netmap_set_all_rings(na, NM_KR_LOCKED);
- na->na_flags |= NAF_ZOMBIE;
/*
* if the netmap adapter is not native, somebody
* changed it, so we can not release it here.
* The NAF_ZOMBIE flag will notify the new owner that
* the driver is gone.
*/
- if (na->na_flags & NAF_NATIVE) {
- netmap_adapter_put(na);
+ if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
+ na->na_flags |= NAF_ZOMBIE;
}
/* give active users a chance to notice that NAF_ZOMBIE has been
* turned on, so that they can stop and return an error to userspace.
@@ -3116,9 +3256,9 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
*/
mbq_lock(q);
- busy = kring->nr_hwtail - kring->nr_hwcur;
- if (busy < 0)
- busy += kring->nkr_num_slots;
+ busy = kring->nr_hwtail - kring->nr_hwcur;
+ if (busy < 0)
+ busy += kring->nkr_num_slots;
if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
@@ -3216,16 +3356,6 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
kring->nr_hwtail -= lim + 1;
}
-#if 0 // def linux
- /* XXX check that the mappings are correct */
- /* need ring_nr, adapter->pdev, direction */
- buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
- if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
- D("error mapping rx netmap buffer %d", i);
- // XXX fix error handling
- }
-
-#endif /* linux */
/*
* Wakeup on the individual and global selwait
* We do the wakeup here, but the ring is not yet reconfigured.
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index b811a017822b8..c122dc64bed26 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -173,6 +173,16 @@ nm_os_ifnet_fini(void)
nm_ifnet_dh_tag);
}
+unsigned
+nm_os_ifnet_mtu(struct ifnet *ifp)
+{
+#if __FreeBSD_version < 1100030
+ return ifp->if_data.ifi_mtu;
+#else /* __FreeBSD_version >= 1100030 */
+ return ifp->if_mtu;
+#endif
+}
+
rawsum_t
nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
{
@@ -294,24 +304,30 @@ nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
+ int ret = 0;
+ nm_os_ifnet_lock();
if (intercept) {
if (gna->save_if_input) {
D("cannot intercept again");
- return EINVAL; /* already set */
+ ret = EINVAL; /* already set */
+ goto out;
}
gna->save_if_input = ifp->if_input;
ifp->if_input = freebsd_generic_rx_handler;
} else {
if (!gna->save_if_input){
D("cannot restore");
- return EINVAL; /* not saved */
+ ret = EINVAL; /* not saved */
+ goto out;
}
ifp->if_input = gna->save_if_input;
gna->save_if_input = NULL;
}
+out:
+ nm_os_ifnet_unlock();
- return 0;
+ return ret;
}
@@ -327,12 +343,14 @@ nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept)
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = netmap_generic_getifp(gna);
+ nm_os_ifnet_lock();
if (intercept) {
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
} else {
ifp->if_transmit = na->if_transmit;
}
+ nm_os_ifnet_unlock();
return 0;
}
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index 1276a3a0c46cb..2ed251a557756 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -86,8 +86,6 @@ __FBSDID("$FreeBSD$");
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
-#define rtnl_lock() ND("rtnl_lock called")
-#define rtnl_unlock() ND("rtnl_unlock called")
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
@@ -168,7 +166,13 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
* has a KASSERT(), checking that the mbuf dtor function is not NULL.
*/
+#if __FreeBSD_version <= 1200050
+static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
+#else /* __FreeBSD_version >= 1200051 */
+/* The arg1 and arg2 pointers argument were removed by r324446, which
+ * in included since version 1200051. */
static void void_mbuf_dtor(struct mbuf *m) { }
+#endif /* __FreeBSD_version >= 1200051 */
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (fn != NULL) ? \
@@ -200,8 +204,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
#include "win_glue.h"
-#define rtnl_lock() ND("rtnl_lock called")
-#define rtnl_unlock() ND("rtnl_unlock called")
#define MBUF_TXQ(m) 0//((m)->m_pkthdr.flowid)
#define MBUF_RXQ(m) 0//((m)->m_pkthdr.flowid)
#define smp_mb() //XXX: to be correctly defined
@@ -210,7 +212,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
#include "bsd_glue.h"
-#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
#include <linux/hrtimer.h>
@@ -339,17 +340,13 @@ generic_netmap_unregister(struct netmap_adapter *na)
int i, r;
if (na->active_fds == 0) {
- rtnl_lock();
-
na->na_flags &= ~NAF_NETMAP_ON;
- /* Release packet steering control. */
- nm_os_catch_tx(gna, 0);
-
/* Stop intercepting packets on the RX path. */
nm_os_catch_rx(gna, 0);
- rtnl_unlock();
+ /* Release packet steering control. */
+ nm_os_catch_tx(gna, 0);
}
for_each_rx_kring_h(r, kring, na) {
@@ -510,24 +507,20 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
}
if (na->active_fds == 0) {
- rtnl_lock();
-
/* Prepare to intercept incoming traffic. */
error = nm_os_catch_rx(gna, 1);
if (error) {
D("nm_os_catch_rx(1) failed (%d)", error);
- goto register_handler;
+ goto free_tx_pools;
}
- /* Make netmap control the packet steering. */
+ /* Let netmap control the packet steering. */
error = nm_os_catch_tx(gna, 1);
if (error) {
D("nm_os_catch_tx(1) failed (%d)", error);
goto catch_rx;
}
- rtnl_unlock();
-
na->na_flags |= NAF_NETMAP_ON;
#ifdef RATE_GENERIC
@@ -548,8 +541,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
/* Here (na->active_fds == 0) holds. */
catch_rx:
nm_os_catch_rx(gna, 0);
-register_handler:
- rtnl_unlock();
free_tx_pools:
for_each_tx_kring(r, kring, na) {
mtx_destroy(&kring->tx_event_lock);
@@ -626,7 +617,11 @@ generic_mbuf_destructor(struct mbuf *m)
* txsync. */
netmap_generic_irq(na, r, NULL);
#ifdef __FreeBSD__
+#if __FreeBSD_version <= 1200050
+ void_mbuf_dtor(m, NULL, NULL);
+#else /* __FreeBSD_version >= 1200051 */
void_mbuf_dtor(m);
+#endif /* __FreeBSD_version >= 1200051 */
#endif
}
@@ -1017,7 +1012,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* Adapter-specific variables. */
- uint16_t slot_flags = kring->nkr_slot_flags;
u_int nm_buf_len = NETMAP_BUF_SIZE(na);
struct mbq tmpq;
struct mbuf *m;
@@ -1096,7 +1090,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
avail -= nm_buf_len;
ring->slot[nm_i].len = copy;
- ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0);
+ ring->slot[nm_i].flags = (mlen ? NS_MOREFRAG : 0);
nm_i = nm_next(nm_i, lim);
}
@@ -1208,6 +1202,15 @@ generic_netmap_attach(struct ifnet *ifp)
}
#endif
+ if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ /* If NA(ifp) is not null but there is no valid netmap
+ * adapter it means that someone else is using the same
+ * pointer (e.g. ax25_ptr on linux). This happens for
+ * instance when also PF_RING is in use. */
+ D("Error: netmap adapter hook is busy");
+ return EBUSY;
+ }
+
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 268c980ff1746..3e64510913242 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -39,6 +39,9 @@
#if defined(linux)
+#if defined(CONFIG_NETMAP_EXTMEM)
+#define WITH_EXTMEM
+#endif
#if defined(CONFIG_NETMAP_VALE)
#define WITH_VALE
#endif
@@ -90,6 +93,7 @@
#define NM_MTX_INIT(m) sx_init(&(m), #m)
#define NM_MTX_DESTROY(m) sx_destroy(&(m))
#define NM_MTX_LOCK(m) sx_xlock(&(m))
+#define NM_MTX_SPINLOCK(m) while (!sx_try_xlock(&(m))) ;
#define NM_MTX_UNLOCK(m) sx_xunlock(&(m))
#define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED)
@@ -100,7 +104,7 @@
#define MBUF_TRANSMIT(na, ifp, m) ((na)->if_transmit(ifp, m))
#define GEN_TX_MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
-#define NM_ATOMIC_T volatile int // XXX ?
+#define NM_ATOMIC_T volatile int /* required by atomic/bitops.h */
/* atomic operations */
#include <machine/atomic.h>
#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
@@ -132,13 +136,10 @@ struct nm_selinfo {
};
-// XXX linux struct, not used in FreeBSD
-struct net_device_ops {
-};
-struct ethtool_ops {
-};
struct hrtimer {
+ /* Not used in FreeBSD. */
};
+
#define NM_BNS_GET(b)
#define NM_BNS_PUT(b)
@@ -202,14 +203,6 @@ struct hrtimer {
#define NETMAP_KERNEL_XCHANGE_POINTERS _IO('i', 180)
#define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL _IO_direct('i', 195)
-//Empty data structures are not permitted by MSVC compiler
-//XXX_ale, try to solve this problem
-struct net_device_ops{
- char data[1];
-};
-typedef struct ethtool_ops{
- char data[1];
-};
typedef struct hrtimer{
KTIMER timer;
BOOLEAN active;
@@ -297,6 +290,8 @@ void nm_os_ifnet_fini(void);
void nm_os_ifnet_lock(void);
void nm_os_ifnet_unlock(void);
+unsigned nm_os_ifnet_mtu(struct ifnet *ifp);
+
void nm_os_get_module(void);
void nm_os_put_module(void);
@@ -305,8 +300,10 @@ void netmap_undo_zombie(struct ifnet *);
/* os independent alloc/realloc/free */
void *nm_os_malloc(size_t);
+void *nm_os_vmalloc(size_t);
void *nm_os_realloc(void *, size_t new_size, size_t old_size);
void nm_os_free(void *);
+void nm_os_vfree(void *);
/* passes a packet up to the host stack.
* If the packet is sent (or dropped) immediately it returns NULL,
@@ -371,8 +368,7 @@ struct netmap_zmon_list {
* TX rings: hwcur + hwofs coincides with next_to_send
*
* For received packets, slot->flags is set to nkr_slot_flags
- * so we can provide a proper initial value (e.g. set NS_FORWARD
- * when operating in 'transparent' mode).
+ * so we can provide a proper initial value.
*
* The following fields are used to implement lock-free copy of packets
* from input to output ports in VALE switch:
@@ -427,6 +423,7 @@ struct netmap_kring {
* (used internally by pipes and
* by ptnetmap host ports)
*/
+#define NKR_NOINTR 0x10 /* don't use interrupts on this ring */
uint32_t nr_mode;
uint32_t nr_pending_mode;
@@ -442,8 +439,6 @@ struct netmap_kring {
*/
int32_t nkr_hwofs;
- uint16_t nkr_slot_flags; /* initial value for flags */
-
/* last_reclaim is opaque marker to help reduce the frequency
* of operations such as reclaiming tx buffers. A possible use
* is set it to ticks and do the reclaim only once per tick.
@@ -580,7 +575,7 @@ nm_prev(uint32_t i, uint32_t lim)
+-----------------+ +-----------------+
| | | |
- |XXX free slot XXX| |XXX free slot XXX|
+ | free | | free |
+-----------------+ +-----------------+
head->| owned by user |<-hwcur | not sent to nic |<-hwcur
| | | yet |
@@ -621,9 +616,14 @@ tail->| |<-hwtail | |<-hwlease
* a circular array where completions should be reported.
*/
+struct lut_entry;
+#ifdef __FreeBSD__
+#define plut_entry lut_entry
+#endif
struct netmap_lut {
struct lut_entry *lut;
+ struct plut_entry *plut;
uint32_t objtotal; /* max buffer index */
uint32_t objsize; /* buffer size */
};
@@ -671,6 +671,7 @@ struct netmap_adapter {
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */
+#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
* cannot be registered from userspace
@@ -711,9 +712,8 @@ struct netmap_adapter {
/* copy of if_input for netmap_send_up() */
void (*if_input)(struct ifnet *, struct mbuf *);
- /* references to the ifnet and device routines, used by
- * the generic netmap functions.
- */
+ /* Back reference to the parent ifnet struct. Used for
+ * hardware ports (emulated netmap included). */
struct ifnet *ifp; /* adapter is ifp->if_softc */
/*---- callbacks for this netmap adapter -----*/
@@ -806,6 +806,7 @@ struct netmap_adapter {
* buffer addresses, the total number of buffers and the buffer size.
*/
struct netmap_mem_d *nm_mem;
+ struct netmap_mem_d *nm_mem_prev;
struct netmap_lut na_lut;
/* additional information attached to this adapter
@@ -861,6 +862,8 @@ NMR(struct netmap_adapter *na, enum txrx t)
return (t == NR_TX ? na->tx_rings : na->rx_rings);
}
+int nma_intr_enable(struct netmap_adapter *na, int onoff);
+
/*
* If the NIC is owned by the kernel
* (i.e., bridge), neither another bridge nor user can use it;
@@ -898,8 +901,10 @@ struct netmap_vp_adapter { /* VALE software port */
struct netmap_hw_adapter { /* physical device */
struct netmap_adapter up;
- struct net_device_ops nm_ndo; // XXX linux only
- struct ethtool_ops nm_eto; // XXX linux only
+#ifdef linux
+ struct net_device_ops nm_ndo;
+ struct ethtool_ops nm_eto;
+#endif
const struct ethtool_ops* save_ethtool;
int (*nm_hw_register)(struct netmap_adapter *, int onoff);
@@ -920,12 +925,10 @@ struct netmap_generic_adapter { /* emulated device */
/* Pointer to a previously used netmap adapter. */
struct netmap_adapter *prev;
- /* generic netmap adapters support:
- * a net_device_ops struct overrides ndo_select_queue(),
- * save_if_input saves the if_input hook (FreeBSD),
- * mit implements rx interrupt mitigation,
+ /* Emulated netmap adapters support:
+ * - save_if_input saves the if_input hook (FreeBSD);
+ * - mit implements rx interrupt mitigation;
*/
- struct net_device_ops generic_ndo;
void (*save_if_input)(struct ifnet *, struct mbuf *);
struct nm_generic_mit *mit;
@@ -1186,7 +1189,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr)
* virtual ports (vale, pipes, monitor)
*/
int netmap_attach(struct netmap_adapter *);
-int netmap_attach_ext(struct netmap_adapter *, size_t size);
+int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg);
void netmap_detach(struct ifnet *);
int netmap_transmit(struct ifnet *, struct mbuf *);
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
@@ -1279,15 +1282,12 @@ nm_set_native_flags(struct netmap_adapter *na)
ifp->if_transmit = netmap_transmit;
#elif defined (_WIN32)
(void)ifp; /* prevent a warning */
- //XXX_ale can we just comment those?
- //na->if_transmit = ifp->if_transmit;
- //ifp->if_transmit = netmap_transmit;
-#else
+#elif defined (linux)
na->if_transmit = (void *)ifp->netdev_ops;
ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
-#endif
+#endif /* linux */
nm_update_hostrings_mode(na);
}
@@ -1308,8 +1308,6 @@ nm_clear_native_flags(struct netmap_adapter *na)
ifp->if_transmit = na->if_transmit;
#elif defined(_WIN32)
(void)ifp; /* prevent a warning */
- //XXX_ale can we just comment those?
- //ifp->if_transmit = na->if_transmit;
#else
ifp->netdev_ops = (void *)na->if_transmit;
ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
@@ -1374,8 +1372,6 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
* - provide defaults for the setup callbacks and the memory allocator
*/
int netmap_attach_common(struct netmap_adapter *);
-/* common actions to be performed on netmap adapter destruction */
-void netmap_detach_common(struct netmap_adapter *);
/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
* coming from a struct nmreq
*/
@@ -1431,8 +1427,8 @@ int netmap_get_hw_na(struct ifnet *ifp,
*
* VALE only supports unicast or broadcast. The lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
- * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
- * XXX in practice "unknown" might be handled same as broadcast.
+ * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
+ * drop.
*/
typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
struct netmap_vp_adapter *);
@@ -1471,7 +1467,7 @@ int netmap_bdg_config(struct nmreq *nmr);
#ifdef WITH_PIPES
/* max number of pipes per device */
-#define NM_MAXPIPES 64 /* XXX how many? */
+#define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */
void netmap_pipe_dealloc(struct netmap_adapter *);
int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create);
@@ -1573,7 +1569,9 @@ extern int netmap_flags;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
extern int netmap_generic_rings;
+#ifdef linux
extern int netmap_generic_txqdisc;
+#endif
extern int ptnetmap_tx_workers;
/*
@@ -1618,13 +1616,14 @@ static void netmap_dmamap_cb(__unused void *arg,
/* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
* XXX can we do it without a callback ?
*/
-static inline void
+static inline int
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map)
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
+ return 0;
}
static inline void
@@ -1635,6 +1634,8 @@ netmap_unload_map(struct netmap_adapter *na,
bus_dmamap_unload(tag, map);
}
+#define netmap_sync_map(na, tag, map, sz, t)
+
/* update the map when a buffer changes. */
static inline void
netmap_reload_map(struct netmap_adapter *na,
@@ -1654,22 +1655,52 @@ netmap_reload_map(struct netmap_adapter *na,
int nm_iommu_group_id(bus_dma_tag_t dev);
#include <linux/dma-mapping.h>
-static inline void
+/*
+ * on linux we need
+ * dma_map_single(&pdev->dev, virt_addr, len, direction)
+ * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction)
+ */
+#if 0
+ struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l];
+ /* set time_stamp *before* dma to help avoid a possible race */
+ buffer_info->time_stamp = jiffies;
+ buffer_info->mapped_as_page = false;
+ buffer_info->length = len;
+ //buffer_info->next_to_watch = l;
+ /* reload dma map */
+ dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
+ NETMAP_BUF_SIZE, DMA_TO_DEVICE);
+ buffer_info->dma = dma_map_single(&adapter->pdev->dev,
+ addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
+ D("dma mapping error");
+ /* goto dma_error; See e1000_put_txbuf() */
+ /* XXX reset */
+ }
+ tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
+
+#endif
+
+static inline int
netmap_load_map(struct netmap_adapter *na,
- bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
+ bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size)
{
- if (0 && map) {
- *map = dma_map_single(na->pdev, buf, NETMAP_BUF_SIZE(na),
+ if (map) {
+ *map = dma_map_single(na->pdev, buf, size,
DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(na->pdev, *map)) {
+ *map = 0;
+ return ENOMEM;
+ }
}
+ return 0;
}
static inline void
netmap_unload_map(struct netmap_adapter *na,
- bus_dma_tag_t tag, bus_dmamap_t map)
+ bus_dma_tag_t tag, bus_dmamap_t map, u_int sz)
{
- u_int sz = NETMAP_BUF_SIZE(na);
-
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
@@ -1677,6 +1708,20 @@ netmap_unload_map(struct netmap_adapter *na,
}
static inline void
+netmap_sync_map(struct netmap_adapter *na,
+ bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
+{
+ if (*map) {
+ if (t == NR_RX)
+ dma_sync_single_for_cpu(na->pdev, *map, sz,
+ DMA_FROM_DEVICE);
+ else
+ dma_sync_single_for_device(na->pdev, *map, sz,
+ DMA_TO_DEVICE);
+ }
+}
+
+static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
@@ -1691,44 +1736,6 @@ netmap_reload_map(struct netmap_adapter *na,
DMA_BIDIRECTIONAL);
}
-/*
- * XXX How do we redefine these functions:
- *
- * on linux we need
- * dma_map_single(&pdev->dev, virt_addr, len, direction)
- * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction
- * The len can be implicit (on netmap it is NETMAP_BUF_SIZE)
- * unfortunately the direction is not, so we need to change
- * something to have a cross API
- */
-
-#if 0
- struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l];
- /* set time_stamp *before* dma to help avoid a possible race */
- buffer_info->time_stamp = jiffies;
- buffer_info->mapped_as_page = false;
- buffer_info->length = len;
- //buffer_info->next_to_watch = l;
- /* reload dma map */
- dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
- NETMAP_BUF_SIZE, DMA_TO_DEVICE);
- buffer_info->dma = dma_map_single(&adapter->pdev->dev,
- addr, NETMAP_BUF_SIZE, DMA_TO_DEVICE);
-
- if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
- D("dma mapping error");
- /* goto dma_error; See e1000_put_txbuf() */
- /* XXX reset */
- }
- tx_desc->buffer_addr = htole64(buffer_info->dma); //XXX
-
-#endif
-
-/*
- * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction.
- */
-#define bus_dmamap_sync(_a, _b, _c)
-
#endif /* linux */
@@ -1764,10 +1771,26 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx)
/* Entries of the look-up table. */
+#ifdef __FreeBSD__
+struct lut_entry {
+ void *vaddr; /* virtual address. */
+ vm_paddr_t paddr; /* physical address. */
+};
+#else /* linux & _WIN32 */
+/* dma-mapping in linux can assign a buffer a different address
+ * depending on the device, so we need to have a separate
+ * physical-address look-up table for each na.
+ * We can still share the vaddrs, though, therefore we split
+ * the lut_entry structure.
+ */
struct lut_entry {
void *vaddr; /* virtual address. */
+};
+
+struct plut_entry {
vm_paddr_t paddr; /* physical address. */
};
+#endif /* linux & _WIN32 */
struct netmap_obj_pool;
@@ -1789,12 +1812,13 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
struct lut_entry *lut = na->na_lut.lut;
+ struct plut_entry *plut = na->na_lut.plut;
void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
-#ifndef _WIN32
- *pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
+#ifdef _WIN32
+ *pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart;
#else
- *pp = (i >= na->na_lut.objtotal) ? (uint64_t)lut[0].paddr.QuadPart : (uint64_t)lut[i].paddr.QuadPart;
+ *pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr;
#endif
return ret;
}
@@ -1823,7 +1847,7 @@ struct netmap_priv_d {
uint32_t np_flags; /* from the ioctl */
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
- uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
+ uint16_t np_txpoll;
int np_sync_flags; /* to be passed to nm_sync */
int np_refs; /* use with NMG_LOCK held */
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index a39aa1b3f042d..1f206a1b02927 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -108,6 +108,7 @@ struct netmap_obj_pool {
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
uint32_t *bitmap; /* one bit per buffer, 1 means free */
+ uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
/* ---------------------------------------------------*/
@@ -134,7 +135,7 @@ struct netmap_obj_pool {
struct netmap_mem_ops {
int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
- int (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
+ int (*nmd_get_info)(struct netmap_mem_d *, uint64_t *size,
u_int *memflags, uint16_t *id);
vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
@@ -217,7 +218,7 @@ netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
}
NMD_DEFCB1(int, get_lut, struct netmap_lut *);
-NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
+NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *);
NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
static int netmap_mem_config(struct netmap_mem_d *);
NMD_DEFCB(int, config);
@@ -243,6 +244,7 @@ netmap_mem_get_id(struct netmap_mem_d *nmd)
#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx)
+#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx)
#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx)
#ifdef NM_DEBUG_MEM_PUTGET
@@ -291,68 +293,115 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
NMA_UNLOCK(nmd);
}
- if (!nmd->lasterr && na->pdev)
- netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
+ if (!nmd->lasterr && na->pdev) {
+ nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
+ if (nmd->lasterr) {
+ netmap_mem_deref(nmd, na);
+ }
+ }
return nmd->lasterr;
}
-void
+static int
+nm_isset(uint32_t *bitmap, u_int i)
+{
+ return bitmap[ (i>>5) ] & ( 1U << (i & 31U) );
+}
+
+
+static int
+netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p)
+{
+ u_int n, j;
+
+ if (p->bitmap == NULL) {
+ /* Allocate the bitmap */
+ n = (p->objtotal + 31) / 32;
+ p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
+ if (p->bitmap == NULL) {
+ D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
+ p->name);
+ return ENOMEM;
+ }
+ p->bitmap_slots = n;
+ } else {
+ memset(p->bitmap, 0, p->bitmap_slots);
+ }
+
+ p->objfree = 0;
+ /*
+ * Set all the bits in the bitmap that have
+ * corresponding buffers to 1 to indicate they are
+ * free.
+ */
+ for (j = 0; j < p->objtotal; j++) {
+ if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) {
+ D("skipping %s %d", p->name, j);
+ continue;
+ }
+ p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) );
+ p->objfree++;
+ }
+
+ ND("%s free %u", p->name, p->objfree);
+ if (p->objfree == 0)
+ return ENOMEM;
+
+ return 0;
+}
+
+static int
+netmap_mem_init_bitmaps(struct netmap_mem_d *nmd)
+{
+ int i, error = 0;
+
+ for (i = 0; i < NETMAP_POOLS_NR; i++) {
+ struct netmap_obj_pool *p = &nmd->pools[i];
+
+ error = netmap_init_obj_allocator_bitmap(p);
+ if (error)
+ return error;
+ }
+
+ /*
+ * buffers 0 and 1 are reserved
+ */
+ if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) {
+ return ENOMEM;
+ }
+
+ nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
+ if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
+ /* XXX This check is a workaround that prevents a
+ * NULL pointer crash which currently happens only
+ * with ptnetmap guests.
+ * Removed shared-info --> is the bug still there? */
+ nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3U;
+ }
+ return 0;
+}
+
+int
netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
+ int last_user = 0;
NMA_LOCK(nmd);
- netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
+ if (na->active_fds <= 0)
+ netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
if (nmd->active == 1) {
- u_int i;
-
+ last_user = 1;
/*
* Reset the allocator when it falls out of use so that any
* pool resources leaked by unclean application exits are
* reclaimed.
*/
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- struct netmap_obj_pool *p;
- u_int j;
-
- p = &nmd->pools[i];
- p->objfree = p->objtotal;
- /*
- * Reproduce the net effect of the M_ZERO malloc()
- * and marking of free entries in the bitmap that
- * occur in finalize_obj_allocator()
- */
- memset(p->bitmap,
- '\0',
- sizeof(uint32_t) * ((p->objtotal + 31) / 32));
-
- /*
- * Set all the bits in the bitmap that have
- * corresponding buffers to 1 to indicate they are
- * free.
- */
- for (j = 0; j < p->objtotal; j++) {
- if (p->lut[j].vaddr != NULL) {
- p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) );
- }
- }
- }
-
- /*
- * Per netmap_mem_finalize_all(),
- * buffers 0 and 1 are reserved
- */
- nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
- if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
- /* XXX This check is a workaround that prevents a
- * NULL pointer crash which currently happens only
- * with ptnetmap guests.
- * Removed shared-info --> is the bug still there? */
- nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
- }
+ netmap_mem_init_bitmaps(nmd);
}
nmd->ops->nmd_deref(nmd);
NMA_UNLOCK(nmd);
+ return last_user;
}
@@ -361,6 +410,9 @@ static int
netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
{
lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
+#ifdef __FreeBSD__
+ lut->plut = lut->lut;
+#endif
lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
@@ -442,7 +494,6 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
/* blueprint for the private memory allocators */
-extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
/* XXX clang is not happy about using name as a print format */
static const struct netmap_mem_d nm_blueprint = {
.pools = {
@@ -601,6 +652,48 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
return err;
}
+static struct lut_entry *
+nm_alloc_lut(u_int nobj)
+{
+ size_t n = sizeof(struct lut_entry) * nobj;
+ struct lut_entry *lut;
+#ifdef linux
+ lut = vmalloc(n);
+#else
+ lut = nm_os_malloc(n);
+#endif
+ return lut;
+}
+
+static void
+nm_free_lut(struct lut_entry *lut, u_int objtotal)
+{
+ bzero(lut, sizeof(struct lut_entry) * objtotal);
+#ifdef linux
+ vfree(lut);
+#else
+ nm_os_free(lut);
+#endif
+}
+
+#if defined(linux) || defined(_WIN32)
+static struct plut_entry *
+nm_alloc_plut(u_int nobj)
+{
+ size_t n = sizeof(struct plut_entry) * nobj;
+ struct plut_entry *lut;
+ lut = vmalloc(n);
+ return lut;
+}
+
+static void
+nm_free_plut(struct plut_entry * lut)
+{
+ vfree(lut);
+}
+#endif /* linux or _WIN32 */
+
+
/*
* First, find the allocator that contains the requested offset,
* then locate the cluster through a lookup table.
@@ -613,7 +706,14 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
vm_paddr_t pa;
struct netmap_obj_pool *p;
+#if defined(__FreeBSD__)
+ /* This function is called by netmap_dev_pager_fault(), which holds a
+ * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we
+ * spin on the trylock. */
+ NMA_SPINLOCK(nmd);
+#else
NMA_LOCK(nmd);
+#endif
p = nmd->pools;
for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
@@ -640,7 +740,7 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
+ p[NETMAP_BUF_POOL].memtotal);
NMA_UNLOCK(nmd);
#ifndef _WIN32
- return 0; // XXX bad address
+ return 0; /* bad address */
#else
vm_paddr_t res;
res.QuadPart = 0;
@@ -676,7 +776,8 @@ PMDL
win32_build_user_vm_map(struct netmap_mem_d* nmd)
{
int i, j;
- u_int memsize, memflags, ofs = 0;
+ size_t memsize;
+ u_int memflags, ofs = 0;
PMDL mainMdl, tempMdl;
if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
@@ -746,7 +847,7 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize
}
static int
-netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
+netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags,
nm_memid_t *id)
{
int error = 0;
@@ -835,7 +936,6 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
if (len > p->_objsize) {
D("%s request size %d too large", p->name, len);
- // XXX cannot reduce the size
return NULL;
}
@@ -911,7 +1011,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
ssize_t relofs = (ssize_t) vaddr - (ssize_t) base;
/* Given address, is out of the scope of the current cluster.*/
- if (vaddr < base || relofs >= p->_clustsize)
+ if (base == NULL || vaddr < base || relofs >= p->_clustsize)
continue;
j = j + relofs / p->_objsize;
@@ -923,8 +1023,11 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
vaddr, p->name);
}
-#define netmap_mem_bufsize(n) \
- ((n)->pools[NETMAP_BUF_POOL]._objsize)
+unsigned
+netmap_mem_bufsize(struct netmap_mem_d *nmd)
+{
+ return nmd->pools[NETMAP_BUF_POOL]._objsize;
+}
#define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
#define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
@@ -934,7 +1037,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
-#if 0 // XXX unused
+#if 0 /* currently unused */
/* Return the index associated to the given packet buffer */
#define netmap_buf_index(n, v) \
(netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n))
@@ -1012,6 +1115,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
slot[i].buf_idx = index;
slot[i].len = p->_objsize;
slot[i].flags = 0;
+ slot[i].ptr = 0;
}
ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
@@ -1073,6 +1177,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
if (p->bitmap)
nm_os_free(p->bitmap);
p->bitmap = NULL;
+ if (p->invalid_bitmap)
+ nm_os_free(p->invalid_bitmap);
+ p->invalid_bitmap = NULL;
if (p->lut) {
u_int i;
@@ -1083,15 +1190,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
* in the lut.
*/
for (i = 0; i < p->objtotal; i += p->_clustentries) {
- if (p->lut[i].vaddr)
- contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
+ contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
}
- bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
-#ifdef linux
- vfree(p->lut);
-#else
- nm_os_free(p->lut);
-#endif
+ nm_free_lut(p->lut, p->objtotal);
}
p->lut = NULL;
p->objtotal = 0;
@@ -1201,19 +1302,6 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
return 0;
}
-static struct lut_entry *
-nm_alloc_lut(u_int nobj)
-{
- size_t n = sizeof(struct lut_entry) * nobj;
- struct lut_entry *lut;
-#ifdef linux
- lut = vmalloc(n);
-#else
- lut = nm_os_malloc(n);
-#endif
- return lut;
-}
-
/* call with NMA_LOCK held */
static int
netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
@@ -1221,6 +1309,11 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
int i; /* must be signed */
size_t n;
+ if (p->lut) {
+ /* already finalized, nothing to do */
+ return 0;
+ }
+
/* optimistically assume we have enough memory */
p->numclusters = p->_numclusters;
p->objtotal = p->_objtotal;
@@ -1231,18 +1324,8 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
goto clean;
}
- /* Allocate the bitmap */
- n = (p->objtotal + 31) / 32;
- p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
- if (p->bitmap == NULL) {
- D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
- p->name);
- goto clean;
- }
- p->bitmap_slots = n;
-
/*
- * Allocate clusters, init pointers and bitmap
+ * Allocate clusters, init pointers
*/
n = p->_clustsize;
@@ -1270,7 +1353,6 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
goto out;
lim = i / 2;
for (i--; i >= lim; i--) {
- p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) );
if (i % p->_clustentries == 0 && p->lut[i].vaddr)
contigfree(p->lut[i].vaddr,
n, M_NETMAP);
@@ -1283,8 +1365,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
break;
}
/*
- * Set bitmap and lut state for all buffers in the current
- * cluster.
+ * Set lut state for all buffers in the current cluster.
*
* [i, lim) is the set of buffer indexes that cover the
* current cluster.
@@ -1294,15 +1375,13 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
* of p->_objsize.
*/
for (; i < lim; i++, clust += p->_objsize) {
- p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) );
p->lut[i].vaddr = clust;
+#if !defined(linux) && !defined(_WIN32)
p->lut[i].paddr = vtophys(clust);
+#endif
}
}
- p->objfree = p->objtotal;
p->memtotal = p->numclusters * p->_clustsize;
- if (p->objfree == 0)
- goto clean;
if (netmap_verbose)
D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
p->numclusters, p->_clustsize >> 10,
@@ -1348,6 +1427,7 @@ static int
netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
int i, lim = p->_objtotal;
+ struct netmap_lut *lut = &na->na_lut;
if (na == NULL || na->pdev == NULL)
return 0;
@@ -1355,16 +1435,23 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
#if defined(__FreeBSD__)
(void)i;
(void)lim;
+ (void)lut;
D("unsupported on FreeBSD");
-
#elif defined(_WIN32)
(void)i;
(void)lim;
- D("unsupported on Windows"); //XXX_ale, really?
+ (void)lut;
+ D("unsupported on Windows");
#else /* linux */
- for (i = 2; i < lim; i++) {
- netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
+ ND("unmapping and freeing plut for %s", na->name);
+ if (lut->plut == NULL)
+ return 0;
+ for (i = 0; i < lim; i += p->_clustentries) {
+ if (lut->plut[i].paddr)
+ netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->_clustsize);
}
+ nm_free_plut(lut->plut);
+ lut->plut = NULL;
#endif /* linux */
return 0;
@@ -1373,23 +1460,65 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
static int
netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
+ int error = 0;
+ int i, lim = p->objtotal;
+ struct netmap_lut *lut = &na->na_lut;
+
+ if (na->pdev == NULL)
+ return 0;
+
#if defined(__FreeBSD__)
+ (void)i;
+ (void)lim;
+ (void)lut;
D("unsupported on FreeBSD");
#elif defined(_WIN32)
- D("unsupported on Windows"); //XXX_ale, really?
+ (void)i;
+ (void)lim;
+ (void)lut;
+ D("unsupported on Windows");
#else /* linux */
- int i, lim = p->_objtotal;
- if (na->pdev == NULL)
+ if (lut->plut != NULL) {
+ ND("plut already allocated for %s", na->name);
return 0;
+ }
- for (i = 2; i < lim; i++) {
- netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
- p->lut[i].vaddr);
+ ND("allocating physical lut for %s", na->name);
+ lut->plut = nm_alloc_plut(lim);
+ if (lut->plut == NULL) {
+ D("Failed to allocate physical lut for %s", na->name);
+ return ENOMEM;
+ }
+
+ for (i = 0; i < lim; i += p->_clustentries) {
+ lut->plut[i].paddr = 0;
}
+
+ for (i = 0; i < lim; i += p->_clustentries) {
+ int j;
+
+ if (p->lut[i].vaddr == NULL)
+ continue;
+
+ error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr,
+ p->lut[i].vaddr, p->_clustsize);
+ if (error) {
+ D("Failed to map cluster #%d from the %s pool", i, p->name);
+ break;
+ }
+
+ for (j = 1; j < p->_clustentries; j++) {
+ lut->plut[i + j].paddr = lut->plut[i + j - 1].paddr + p->_objsize;
+ }
+ }
+
+ if (error)
+ netmap_mem_unmap(p, na);
+
#endif /* linux */
- return 0;
+ return error;
}
static int
@@ -1406,9 +1535,10 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
goto error;
nmd->nm_totalsize += nmd->pools[i].memtotal;
}
- /* buffers 0 and 1 are reserved */
- nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
- nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
+ nmd->lasterr = netmap_mem_init_bitmaps(nmd);
+ if (nmd->lasterr)
+ goto error;
+
nmd->flags |= NETMAP_MEM_FINALIZED;
if (netmap_verbose)
@@ -1430,23 +1560,25 @@ error:
/*
* allocator for private memory
*/
-static struct netmap_mem_d *
-_netmap_mem_private_new(struct netmap_obj_params *p, int *perr)
+static void *
+_netmap_mem_private_new(size_t size, struct netmap_obj_params *p,
+ struct netmap_mem_ops *ops, int *perr)
{
struct netmap_mem_d *d = NULL;
int i, err = 0;
- d = nm_os_malloc(sizeof(struct netmap_mem_d));
+ d = nm_os_malloc(size);
if (d == NULL) {
err = ENOMEM;
goto error;
}
*d = nm_blueprint;
+ d->ops = ops;
err = nm_mem_assign_id(d);
if (err)
- goto error;
+ goto error_free;
snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
@@ -1461,14 +1593,18 @@ _netmap_mem_private_new(struct netmap_obj_params *p, int *perr)
err = netmap_mem_config(d);
if (err)
- goto error;
+ goto error_rel_id;
d->flags &= ~NETMAP_MEM_FINALIZED;
return d;
+error_rel_id:
+ NMA_LOCK_DESTROY(d);
+ nm_mem_release_id(d);
+error_free:
+ nm_os_free(d);
error:
- netmap_mem_delete(d);
if (perr)
*perr = err;
return NULL;
@@ -1480,7 +1616,7 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
{
struct netmap_mem_d *d = NULL;
struct netmap_obj_params p[NETMAP_POOLS_NR];
- int i, err = 0;
+ int i;
u_int v, maxd;
/* account for the fake host rings */
txr++;
@@ -1527,16 +1663,9 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
p[NETMAP_BUF_POOL].num,
p[NETMAP_BUF_POOL].size);
- d = _netmap_mem_private_new(p, perr);
- if (d == NULL)
- goto error;
+ d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr);
return d;
-error:
- netmap_mem_delete(d);
- if (perr)
- *perr = err;
- return NULL;
}
@@ -1581,14 +1710,14 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd)
int err;
/* update configuration if changed */
- if (netmap_mem2_config(nmd))
+ if (netmap_mem_config(nmd))
goto out1;
nmd->active++;
if (nmd->flags & NETMAP_MEM_FINALIZED) {
/* may happen if config is not changed */
- ND("nothing to do");
+ D("nothing to do");
goto out;
}
@@ -1621,12 +1750,21 @@ netmap_mem2_delete(struct netmap_mem_d *nmd)
nm_os_free(nmd);
}
+#ifdef WITH_EXTMEM
+/* doubly linekd list of all existing external allocators */
+static struct netmap_mem_ext *netmap_mem_ext_list = NULL;
+NM_MTX_T nm_mem_ext_list_lock;
+#endif /* WITH_EXTMEM */
+
int
netmap_mem_init(void)
{
NM_MTX_INIT(nm_mem_list_lock);
NMA_LOCK_INIT(&nm_mem);
netmap_mem_get(&nm_mem);
+#ifdef WITH_EXTMEM
+ NM_MTX_INIT(nm_mem_ext_list_lock);
+#endif /* WITH_EXTMEM */
return (0);
}
@@ -1648,10 +1786,13 @@ netmap_free_rings(struct netmap_adapter *na)
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
- ND("skipping ring %s (ring %p, users %d)",
- kring->name, ring, kring->users);
+ if (netmap_verbose)
+ D("NOT deleting ring %s (ring %p, users %d neekring %d)",
+ kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
+ if (netmap_verbose)
+ D("deleting ring %s", kring->name);
if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
netmap_ring_free(na->nm_mem, ring);
@@ -1684,9 +1825,13 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
/* uneeded, or already created by somebody else */
- ND("skipping ring %s", kring->name);
+ if (netmap_verbose)
+ D("NOT creating ring %s (ring %p, users %d neekring %d)",
+ kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
+ if (netmap_verbose)
+ D("creating %s", kring->name);
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
@@ -1707,7 +1852,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
- *(uint16_t *)(uintptr_t)&ring->nr_buf_size =
+ *(uint32_t *)(uintptr_t)&ring->nr_buf_size =
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
@@ -1876,7 +2021,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
struct netmap_pools_info pi;
- unsigned int memsize;
+ uint64_t memsize;
uint16_t memid;
int ret;
@@ -1910,6 +2055,340 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
return 0;
}
+#ifdef WITH_EXTMEM
+struct netmap_mem_ext {
+ struct netmap_mem_d up;
+
+ struct page **pages;
+ int nr_pages;
+ struct netmap_mem_ext *next, *prev;
+};
+
+/* call with nm_mem_list_lock held */
+static void
+netmap_mem_ext_register(struct netmap_mem_ext *e)
+{
+ NM_MTX_LOCK(nm_mem_ext_list_lock);
+ if (netmap_mem_ext_list)
+ netmap_mem_ext_list->prev = e;
+ e->next = netmap_mem_ext_list;
+ netmap_mem_ext_list = e;
+ e->prev = NULL;
+ NM_MTX_UNLOCK(nm_mem_ext_list_lock);
+}
+
+/* call with nm_mem_list_lock held */
+static void
+netmap_mem_ext_unregister(struct netmap_mem_ext *e)
+{
+ if (e->prev)
+ e->prev->next = e->next;
+ else
+ netmap_mem_ext_list = e->next;
+ if (e->next)
+ e->next->prev = e->prev;
+ e->prev = e->next = NULL;
+}
+
+static int
+netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages)
+{
+ int i;
+
+ if (e->nr_pages != nr_pages)
+ return 0;
+
+ for (i = 0; i < nr_pages; i++)
+ if (pages[i] != e->pages[i])
+ return 0;
+
+ return 1;
+}
+
+static struct netmap_mem_ext *
+netmap_mem_ext_search(struct page **pages, int nr_pages)
+{
+ struct netmap_mem_ext *e;
+
+ NM_MTX_LOCK(nm_mem_ext_list_lock);
+ for (e = netmap_mem_ext_list; e; e = e->next) {
+ if (netmap_mem_ext_same_pages(e, pages, nr_pages)) {
+ netmap_mem_get(&e->up);
+ break;
+ }
+ }
+ NM_MTX_UNLOCK(nm_mem_ext_list_lock);
+ return e;
+}
+
+
+static void
+netmap_mem_ext_free_pages(struct page **pages, int nr_pages)
+{
+ int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ kunmap(pages[i]);
+ put_page(pages[i]);
+ }
+ nm_os_vfree(pages);
+}
+
+static void
+netmap_mem_ext_delete(struct netmap_mem_d *d)
+{
+ int i;
+ struct netmap_mem_ext *e =
+ (struct netmap_mem_ext *)d;
+
+ netmap_mem_ext_unregister(e);
+
+ for (i = 0; i < NETMAP_POOLS_NR; i++) {
+ struct netmap_obj_pool *p = &d->pools[i];
+
+ if (p->lut) {
+ nm_free_lut(p->lut, p->objtotal);
+ p->lut = NULL;
+ }
+ }
+ if (e->pages) {
+ netmap_mem_ext_free_pages(e->pages, e->nr_pages);
+ e->pages = NULL;
+ e->nr_pages = 0;
+ }
+ netmap_mem2_delete(d);
+}
+
+static int
+netmap_mem_ext_config(struct netmap_mem_d *nmd)
+{
+ return 0;
+}
+
+struct netmap_mem_ops netmap_mem_ext_ops = {
+ .nmd_get_lut = netmap_mem2_get_lut,
+ .nmd_get_info = netmap_mem2_get_info,
+ .nmd_ofstophys = netmap_mem2_ofstophys,
+ .nmd_config = netmap_mem_ext_config,
+ .nmd_finalize = netmap_mem2_finalize,
+ .nmd_deref = netmap_mem2_deref,
+ .nmd_delete = netmap_mem_ext_delete,
+ .nmd_if_offset = netmap_mem2_if_offset,
+ .nmd_if_new = netmap_mem2_if_new,
+ .nmd_if_delete = netmap_mem2_if_delete,
+ .nmd_rings_create = netmap_mem2_rings_create,
+ .nmd_rings_delete = netmap_mem2_rings_delete
+};
+
+struct netmap_mem_d *
+netmap_mem_ext_create(struct nmreq *nmr, int *perror)
+{
+ uintptr_t p = *(uintptr_t *)&nmr->nr_arg1;
+ struct netmap_pools_info pi;
+ int error = 0;
+ unsigned long end, start;
+ int nr_pages, res, i, j;
+ struct page **pages = NULL;
+ struct netmap_mem_ext *nme;
+ char *clust;
+ size_t off;
+
+ error = copyin((void *)p, &pi, sizeof(pi));
+ if (error)
+ goto out;
+
+ // XXX sanity checks
+ if (pi.if_pool_objtotal == 0)
+ pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num;
+ if (pi.if_pool_objsize == 0)
+ pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size;
+ if (pi.ring_pool_objtotal == 0)
+ pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num;
+ if (pi.ring_pool_objsize == 0)
+ pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size;
+ if (pi.buf_pool_objtotal == 0)
+ pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
+ if (pi.buf_pool_objsize == 0)
+ pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
+ D("if %d %d ring %d %d buf %d %d",
+ pi.if_pool_objtotal, pi.if_pool_objsize,
+ pi.ring_pool_objtotal, pi.ring_pool_objsize,
+ pi.buf_pool_objtotal, pi.buf_pool_objsize);
+
+ end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = p >> PAGE_SHIFT;
+ nr_pages = end - start;
+
+ pages = nm_os_vmalloc(nr_pages * sizeof(*pages));
+ if (pages == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS
+ res = get_user_pages_unlocked(
+ p,
+ nr_pages,
+ pages,
+ FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags
+#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS)
+ res = get_user_pages_unlocked(
+ p,
+ nr_pages,
+ 1, /* write */
+ 0, /* don't force */
+ pages);
+#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS)
+ res = get_user_pages_unlocked(
+ current,
+ current->mm,
+ p,
+ nr_pages,
+ 1, /* write */
+ 0, /* don't force */
+ pages);
+#else
+ down_read(&current->mm->mmap_sem);
+ res = get_user_pages(
+ current,
+ current->mm,
+ p,
+ nr_pages,
+ 1, /* write */
+ 0, /* don't force */
+ pages,
+ NULL);
+ up_read(&current->mm->mmap_sem);
+#endif /* NETMAP_LINUX_GUP */
+
+ if (res < nr_pages) {
+ error = EFAULT;
+ goto out_unmap;
+ }
+
+ nme = netmap_mem_ext_search(pages, nr_pages);
+ if (nme) {
+ netmap_mem_ext_free_pages(pages, nr_pages);
+ return &nme->up;
+ }
+ D("not found, creating new");
+
+ nme = _netmap_mem_private_new(sizeof(*nme),
+ (struct netmap_obj_params[]){
+ { pi.if_pool_objsize, pi.if_pool_objtotal },
+ { pi.ring_pool_objsize, pi.ring_pool_objtotal },
+ { pi.buf_pool_objsize, pi.buf_pool_objtotal }},
+ &netmap_mem_ext_ops,
+ &error);
+ if (nme == NULL)
+ goto out_unmap;
+
+ /* from now on pages will be released by nme destructor;
+ * we let res = 0 to prevent release in out_unmap below
+ */
+ res = 0;
+ nme->pages = pages;
+ nme->nr_pages = nr_pages;
+ nme->up.flags |= NETMAP_MEM_EXT;
+
+ clust = kmap(*pages);
+ off = 0;
+ for (i = 0; i < NETMAP_POOLS_NR; i++) {
+ struct netmap_obj_pool *p = &nme->up.pools[i];
+ struct netmap_obj_params *o = &nme->up.params[i];
+
+ p->_objsize = o->size;
+ p->_clustsize = o->size;
+ p->_clustentries = 1;
+
+ p->lut = nm_alloc_lut(o->num);
+ if (p->lut == NULL) {
+ error = ENOMEM;
+ goto out_delete;
+ }
+
+ p->bitmap_slots = (o->num + sizeof(uint32_t) - 1) / sizeof(uint32_t);
+ p->invalid_bitmap = nm_os_malloc(sizeof(uint32_t) * p->bitmap_slots);
+ if (p->invalid_bitmap == NULL) {
+ error = ENOMEM;
+ goto out_delete;
+ }
+
+ if (nr_pages == 0) {
+ p->objtotal = 0;
+ p->memtotal = 0;
+ p->objfree = 0;
+ continue;
+ }
+
+ for (j = 0; j < o->num && nr_pages > 0; j++) {
+ size_t noff;
+ size_t skip;
+
+ p->lut[j].vaddr = clust + off;
+ ND("%s %d at %p", p->name, j, p->lut[j].vaddr);
+ noff = off + p->_objsize;
+ if (noff < PAGE_SIZE) {
+ off = noff;
+ continue;
+ }
+ ND("too big, recomputing offset...");
+ skip = PAGE_SIZE - (off & PAGE_MASK);
+ while (noff >= PAGE_SIZE) {
+ noff -= skip;
+ pages++;
+ nr_pages--;
+ ND("noff %zu page %p nr_pages %d", noff,
+ page_to_virt(*pages), nr_pages);
+ if (noff > 0 && !nm_isset(p->invalid_bitmap, j) &&
+ (nr_pages == 0 || *pages != *(pages - 1) + 1))
+ {
+ /* out of space or non contiguous,
+ * drop this object
+ * */
+ p->invalid_bitmap[ (j>>5) ] |= 1U << (j & 31U);
+ ND("non contiguous at off %zu, drop", noff);
+ }
+ if (nr_pages == 0)
+ break;
+ skip = PAGE_SIZE;
+ }
+ off = noff;
+ if (nr_pages > 0)
+ clust = kmap(*pages);
+ }
+ p->objtotal = j;
+ p->numclusters = p->objtotal;
+ p->memtotal = j * p->_objsize;
+ ND("%d memtotal %u", j, p->memtotal);
+ }
+
+ /* skip the first netmap_if, where the pools info reside */
+ {
+ struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL];
+ p->invalid_bitmap[0] |= 1U;
+ }
+
+ netmap_mem_ext_register(nme);
+
+ return &nme->up;
+
+out_delete:
+ netmap_mem_put(&nme->up);
+out_unmap:
+ for (i = 0; i < res; i++)
+ put_page(pages[i]);
+ if (res)
+ nm_os_free(pages);
+out:
+ if (perror)
+ *perror = error;
+ return NULL;
+
+}
+#endif /* WITH_EXTMEM */
+
+
#ifdef WITH_PTNETMAP_GUEST
struct mem_pt_if {
struct mem_pt_if *next;
@@ -2020,7 +2499,7 @@ netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
}
static int
-netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size,
+netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size,
u_int *memflags, uint16_t *id)
{
int error = 0;
@@ -2118,7 +2597,6 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
for (i = 0; i < nbuffers; i++) {
ptnmd->buf_lut.lut[i].vaddr = vaddr;
- ptnmd->buf_lut.lut[i].paddr = paddr;
vaddr += bufsize;
paddr += bufsize;
}
@@ -2256,11 +2734,17 @@ out:
static void
netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
{
- /* TODO: remove?? */
#if 0
- struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
- struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem,
- na->ifp);
+ enum txrx t;
+
+ for_rx_tx(t) {
+ u_int i;
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+
+ kring->ring = NULL;
+ }
+ }
#endif
}
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index 81f601c4ca9f4..f0bee7a33fd53 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -136,9 +136,9 @@ struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
-void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
+int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
-int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
+int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd,
u_int extra_bufs, u_int npipes, int* error);
@@ -149,6 +149,14 @@ void netmap_mem_delete(struct netmap_mem_d *);
struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_find(nm_memid_t);
+unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd);
+
+#ifdef WITH_EXTMEM
+struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *);
+#else /* !WITH_EXTMEM */
+#define netmap_mem_ext_create(nmr, _perr) \
+ ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
+#endif /* WITH_EXTMEM */
#ifdef WITH_PTNETMAP_GUEST
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
@@ -163,6 +171,7 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
+#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
index 8b788920ff806..e7cc05f5ab0f0 100644
--- a/sys/dev/netmap/netmap_monitor.c
+++ b/sys/dev/netmap/netmap_monitor.c
@@ -66,9 +66,7 @@
* has released them. In most cases, the consumer is a userspace
* application which may have modified the frame contents.
*
- * Several copy monitors may be active on any ring. Zero-copy monitors,
- * instead, need exclusive access to each of the monitored rings. This may
- * change in the future, if we implement zero-copy monitor chaining.
+ * Several copy or zero-copy monitors may be active on any ring.
*
*/
@@ -263,7 +261,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
if (zmon && z->prev != NULL)
kring = z->prev;
- /* sinchronize with concurrently running nm_sync()s */
+ /* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
if (nm_monitor_none(kring)) {
@@ -329,7 +327,7 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
if (zmon && mz->prev != NULL)
kring = mz->prev;
- /* sinchronize with concurrently running nm_sync()s */
+ /* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
if (zmon) {
diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
index 6dc32b13ff8e3..e0b96a8e52a26 100644
--- a/sys/dev/netmap/netmap_offloadings.c
+++ b/sys/dev/netmap/netmap_offloadings.c
@@ -132,7 +132,7 @@ gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
ND("TCP/UDP csum %x", be16toh(*check));
}
-static int
+static inline int
vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
{
uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
@@ -170,7 +170,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
u_int dst_slots = 0;
if (unlikely(ft_p == ft_end)) {
- RD(3, "No source slots to process");
+ RD(1, "No source slots to process");
return;
}
@@ -189,11 +189,11 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* Initial sanity check on the source virtio-net header. If
* something seems wrong, just drop the packet. */
if (src_len < na->up.virt_hdr_len) {
- RD(3, "Short src vnet header, dropping");
+ RD(1, "Short src vnet header, dropping");
return;
}
- if (vnet_hdr_is_bad(vh)) {
- RD(3, "Bad src vnet header, dropping");
+ if (unlikely(vnet_hdr_is_bad(vh))) {
+ RD(1, "Bad src vnet header, dropping");
return;
}
}
@@ -266,7 +266,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
if (dst_slots >= *howmany) {
/* We still have work to do, but we've run out of
* dst slots, so we have to drop the packet. */
- RD(3, "Not enough slots, dropping GSO packet");
+ ND(1, "Not enough slots, dropping GSO packet");
return;
}
@@ -281,7 +281,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
* encapsulation. */
for (;;) {
if (src_len < ethhlen) {
- RD(3, "Short GSO fragment [eth], dropping");
+ RD(1, "Short GSO fragment [eth], dropping");
return;
}
ethertype = be16toh(*((uint16_t *)
@@ -297,7 +297,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
(gso_hdr + ethhlen);
if (src_len < ethhlen + 20) {
- RD(3, "Short GSO fragment "
+ RD(1, "Short GSO fragment "
"[IPv4], dropping");
return;
}
@@ -310,14 +310,14 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
iphlen = 40;
break;
default:
- RD(3, "Unsupported ethertype, "
+ RD(1, "Unsupported ethertype, "
"dropping GSO packet");
return;
}
ND(3, "type=%04x", ethertype);
if (src_len < ethhlen + iphlen) {
- RD(3, "Short GSO fragment [IP], dropping");
+ RD(1, "Short GSO fragment [IP], dropping");
return;
}
@@ -329,7 +329,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
(gso_hdr + ethhlen + iphlen);
if (src_len < ethhlen + iphlen + 20) {
- RD(3, "Short GSO fragment "
+ RD(1, "Short GSO fragment "
"[TCP], dropping");
return;
}
@@ -340,7 +340,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
}
if (src_len < gso_hdr_len) {
- RD(3, "Short GSO fragment [TCP/UDP], dropping");
+ RD(1, "Short GSO fragment [TCP/UDP], dropping");
return;
}
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index 80843403b996a..48dde5382f77a 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -81,7 +81,8 @@
static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSBEGIN(vars_pipes);
SYSCTL_DECL(_dev_netmap);
-SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
+ &netmap_default_pipes, 0, "For compatibility only");
SYSEND;
/* allocate the pipe array in the parent adapter */
@@ -182,6 +183,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
lim_rx = rxkring->nkr_num_slots - 1;
int m, busy;
+ struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
@@ -208,18 +210,18 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
}
while (limit-- > 0) {
- struct netmap_slot *rs = &rxkring->ring->slot[j];
- struct netmap_slot *ts = &txkring->ring->slot[k];
+ struct netmap_slot *rs = &rxring->slot[j];
+ struct netmap_slot *ts = &txring->slot[k];
struct netmap_slot tmp;
- /* swap the slots */
+ __builtin_prefetch(ts + 1);
+
+ /* swap the slots and report the buffer change */
tmp = *rs;
+ tmp.flags |= NS_BUF_CHANGED;
*rs = *ts;
- *ts = tmp;
-
- /* report the buffer change */
- ts->flags |= NS_BUF_CHANGED;
rs->flags |= NS_BUF_CHANGED;
+ *ts = tmp;
j = nm_next(j, lim_rx);
k = nm_next(k, lim_tx);
diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c
index d3544a5b1728a..edb49dc504acd 100644
--- a/sys/dev/netmap/netmap_pt.c
+++ b/sys/dev/netmap/netmap_pt.c
@@ -169,19 +169,19 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail,
#endif /* RATE */
struct ptnetmap_state {
- /* Kthreads. */
- struct nm_kctx **kctxs;
+ /* Kthreads. */
+ struct nm_kctx **kctxs;
/* Shared memory with the guest (TX/RX) */
struct ptnet_csb_gh __user *csb_gh;
struct ptnet_csb_hg __user *csb_hg;
- bool stopped;
+ bool stopped;
- /* Netmap adapter wrapping the backend. */
- struct netmap_pt_host_adapter *pth_na;
+ /* Netmap adapter wrapping the backend. */
+ struct netmap_pt_host_adapter *pth_na;
- IFRATE(struct rate_context rate_ctx;)
+ IFRATE(struct rate_context rate_ctx;)
};
static inline void
@@ -1268,13 +1268,11 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
}
*na = &pth_na->up;
- netmap_adapter_get(*na);
-
/* set parent busy, because attached for ptnetmap */
parent->na_flags |= NAF_BUSY;
-
strncpy(pth_na->up.name, parent->name, sizeof(pth_na->up.name));
strcat(pth_na->up.name, "-PTN");
+ netmap_adapter_get(*na);
DBG(D("%s ptnetmap request DONE", pth_na->up.name));
@@ -1350,7 +1348,7 @@ netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg,
* go to sleep and we need to be notified by the host when more free
* space is available.
*/
- if (nm_kr_txempty(kring)) {
+ if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
ptgh->guest_need_kick = 1;
/* Double check */
@@ -1415,7 +1413,7 @@ netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg,
* we need to be notified by the host when more RX slots have been
* completed.
*/
- if (nm_kr_rxempty(kring)) {
+ if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
ptgh->guest_need_kick = 1;
/* Double check */
@@ -1504,7 +1502,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg,
if (arg->nm_mem == NULL)
return ENOMEM;
arg->na_flags |= NAF_MEM_OWNER;
- error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter));
+ error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
if (error)
return error;
@@ -1517,7 +1515,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg,
memset(&ptna->dr, 0, sizeof(ptna->dr));
ptna->dr.up.ifp = ifp;
ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
- ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
+ ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
ptna->backend_regifs = 0;
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 0df3d08f2a69c..d364699bce269 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -150,6 +150,8 @@ __FBSDID("$FreeBSD$");
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
+/* Default size for the Maximum Frame Size. */
+#define NM_BDG_MFS_DEFAULT 1514
/*
@@ -160,7 +162,8 @@ __FBSDID("$FreeBSD$");
static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
SYSBEGIN(vars_vale);
SYSCTL_DECL(_dev_netmap);
-SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
+ "Max batch size to be used in the bridge");
SYSEND;
static int netmap_vp_create(struct nmreq *, struct ifnet *,
@@ -226,9 +229,9 @@ struct nm_bridge {
/* the forwarding table, MAC+ports.
* XXX should be changed to an argument to be passed to
- * the lookup function, and allocated on attach
+ * the lookup function
*/
- struct nm_hash_ent ht[NM_BDG_HASH];
+ struct nm_hash_ent *ht; // allocated on attach
#ifdef CONFIG_NET_NS
struct net *ns;
@@ -365,17 +368,20 @@ nm_find_bridge(const char *name, int create)
}
if (i == num_bridges && b) { /* name not found, can create entry */
/* initialize the bridge */
- strncpy(b->bdg_basename, name, namelen);
ND("create new bridge %s with ports %d", b->bdg_basename,
b->bdg_active_ports);
+ b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ if (b->ht == NULL) {
+ D("failed to allocate hash table");
+ return NULL;
+ }
+ strncpy(b->bdg_basename, name, namelen);
b->bdg_namelen = namelen;
b->bdg_active_ports = 0;
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
b->bdg_ops.lookup = netmap_bdg_learning;
- /* reset the MAC address table */
- bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
NM_BNS_GET(b);
}
return b;
@@ -503,6 +509,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
ND("now %d active ports", lim);
if (lim == 0) {
ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
bzero(&b->bdg_ops, sizeof(b->bdg_ops));
NM_BNS_PUT(b);
}
@@ -542,11 +549,14 @@ netmap_vp_dtor(struct netmap_adapter *na)
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
}
- if (vpna->autodelete && na->ifp != NULL) {
- ND("releasing %s", na->ifp->if_xname);
- NMG_UNLOCK();
- nm_os_vi_detach(na->ifp);
- NMG_LOCK();
+ if (na->ifp != NULL && !nm_iszombie(na)) {
+ WNA(na->ifp) = NULL;
+ if (vpna->autodelete) {
+ ND("releasing %s", na->ifp->if_xname);
+ NMG_UNLOCK();
+ nm_os_vi_detach(na->ifp);
+ NMG_LOCK();
+ }
}
}
@@ -603,11 +613,15 @@ err:
static int
nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
{
+ uint64_t memsize;
+ int ret;
nmr->nr_rx_rings = na->num_rx_rings;
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
- return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2);
+ ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2);
+ nmr->nr_memsize = (uint32_t)memsize;
+ return ret;
}
/*
@@ -736,7 +750,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
- // KASSERT(na != NULL);
ND("checking %s", vpna->up.name);
if (!strcmp(vpna->up.name, nr_name)) {
netmap_adapter_get(&vpna->up);
@@ -788,6 +801,18 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
} else {
struct netmap_adapter *hw;
+ /* the vale:nic syntax is only valid for some commands */
+ switch (nmr->nr_cmd) {
+ case NETMAP_BDG_ATTACH:
+ case NETMAP_BDG_DETACH:
+ case NETMAP_BDG_POLLING_ON:
+ case NETMAP_BDG_POLLING_OFF:
+ break; /* ok */
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
error = netmap_get_hw_na(ifp, nmd, &hw);
if (error || hw == NULL)
goto out;
@@ -848,6 +873,12 @@ nm_bdg_ctl_attach(struct nmreq *nmr)
}
}
+ /* XXX check existing one */
+ error = netmap_get_bdg_na(nmr, &na, nmd, 0);
+ if (!error) {
+ error = EBUSY;
+ goto unref_exit;
+ }
error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
if (error) /* no device */
goto unlock_exit;
@@ -1149,9 +1180,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
bna->na_polling_state = bps;
bps->bna = bna;
- /* disable interrupt if possible */
- if (bna->hwna->nm_intr)
- bna->hwna->nm_intr(bna->hwna, 0);
+ /* disable interrupts if possible */
+ nma_intr_enable(bna->hwna, 0);
/* start kthread now */
error = nm_bdg_polling_start_kthreads(bps);
if (error) {
@@ -1159,8 +1189,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
nm_os_free(bps->kthreads);
nm_os_free(bps);
bna->na_polling_state = NULL;
- if (bna->hwna->nm_intr)
- bna->hwna->nm_intr(bna->hwna, 1);
+ nma_intr_enable(bna->hwna, 1);
}
return error;
}
@@ -1180,9 +1209,8 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
bps->configured = false;
nm_os_free(bps);
bna->na_polling_state = NULL;
- /* reenable interrupt */
- if (bna->hwna->nm_intr)
- bna->hwna->nm_intr(bna->hwna, 1);
+ /* reenable interrupts */
+ nma_intr_enable(bna->hwna, 1);
return 0;
}
@@ -1577,7 +1605,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
BDG_WLOCK(vpna->na_bdg);
if (onoff) {
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
if (nm_kring_pending_on(kring))
@@ -1593,7 +1621,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
if (nm_kring_pending_off(kring))
@@ -1657,7 +1685,7 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
*/
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
- sh = nm_bridge_rthash(s); // XXX hash of source
+ sh = nm_bridge_rthash(s); /* hash of source */
/* update source port forwarding entry */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
@@ -1667,11 +1695,10 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
- dh = nm_bridge_rthash(buf); // XXX hash of dst
+ dh = nm_bridge_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
- /* XXX otherwise return NM_BDG_UNKNOWN ? */
}
return dst;
}
@@ -1785,10 +1812,8 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
- if (dst_port == NM_BDG_NOPORT)
+ if (dst_port >= NM_BDG_NOPORT)
continue; /* this packet is identified to be dropped */
- else if (unlikely(dst_port > NM_BDG_MAXPORTS))
- continue;
else if (dst_port == NM_BDG_BROADCAST)
dst_ring = 0; /* broadcasts always go to ring 0 */
else if (unlikely(dst_port == me ||
@@ -1882,10 +1907,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
needed = d->bq_len + brddst->bq_len;
if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
- if (netmap_verbose) {
- RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
- dst_na->up.virt_hdr_len);
- }
+ if (netmap_verbose) {
+ RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
+ dst_na->up.virt_hdr_len);
+ }
/* There is a virtio-net header/offloadings mismatch between
* source and destination. The slower mismatch datapath will
* be used to cope with all the mismatches.
@@ -1902,6 +1927,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
* TCPv4 we must account for ethernet header, IP header
* and TCPv4 header).
*/
+ KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
needed = (needed * na->mfs) /
(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
@@ -1916,6 +1942,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
dst_nr = dst_nr % nrings;
kring = &dst_na->up.rx_rings[dst_nr];
ring = kring->ring;
+ /* the destination ring may have not been opened for RX */
+ if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
+ goto cleanup;
lim = kring->nkr_num_slots - 1;
retry:
@@ -2196,7 +2225,7 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
if (vpna->na_bdg)
- return EBUSY;
+ return netmap_bwrap_attach(name, na);
na->na_vp = vpna;
strncpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
@@ -2248,7 +2277,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
nm_bound_var(&nmr->nr_arg3, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
na->num_rx_desc = nmr->nr_rx_slots;
- vpna->mfs = 1514;
+ /* Set the mfs to a default value, as it is needed on the VALE
+ * mismatch datapath. XXX We should set it according to the MTU
+ * known to the kernel. */
+ vpna->mfs = NM_BDG_MFS_DEFAULT;
vpna->last_smac = ~0llu;
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
@@ -2330,7 +2362,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
struct nm_bridge *b = bna->up.na_bdg,
*bh = bna->host.na_bdg;
- netmap_mem_put(bna->host.up.nm_mem);
+ if (bna->host.up.nm_mem)
+ netmap_mem_put(bna->host.up.nm_mem);
if (b) {
netmap_bdg_detach_common(b, bna->up.bdg_port,
@@ -2459,28 +2492,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
hostna->up.na_lut = na->na_lut;
}
- /* cross-link the netmap rings
- * The original number of rings comes from hwna,
- * rx rings on one side equals tx rings on the other.
- */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
- NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
- }
- }
-
- if (na->na_flags & NAF_HOST_RINGS) {
- struct netmap_adapter *hna = &hostna->up;
- /* the hostna rings are the host rings of the bwrap.
- * The corresponding krings must point back to the
- * hostna
- */
- hna->tx_rings = &na->tx_rings[na->num_tx_rings];
- hna->tx_rings[0].na = hna;
- hna->rx_rings = &na->rx_rings[na->num_rx_rings];
- hna->rx_rings[0].na = hna;
- }
}
/* pass down the pending ring state information */
@@ -2497,9 +2508,10 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* copy up the current ring state information */
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
- NMR(na, t)[i].nr_mode =
- NMR(hwna, t)[i].nr_mode;
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ struct netmap_kring *kring = &NMR(hwna, t)[i];
+ NMR(na, t)[i].nr_mode = kring->nr_mode;
+ }
}
/* impersonate a netmap_vp_adapter */
@@ -2537,6 +2549,14 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
hwna->na_lut.lut = NULL;
hwna->na_lut.objtotal = 0;
hwna->na_lut.objsize = 0;
+
+ /* pass ownership of the netmap rings to the hwna */
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ NMR(na, t)[i].ring = NULL;
+ }
+ }
+
}
return 0;
@@ -2570,6 +2590,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_adapter *hostna = &bna->host.up;
int i, error = 0;
enum txrx t;
@@ -2586,16 +2607,49 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
goto err_del_vp_rings;
}
- /* get each ring slot number from the corresponding hwna ring */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
- NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
+ /* increment the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
+ NMR(hwna, t)[i].users++;
}
+ }
+
+ /* now create the actual rings */
+ error = netmap_mem_rings_create(hwna);
+ if (error) {
+ goto err_dec_users;
+ }
+
+ /* cross-link the netmap rings
+ * The original number of rings comes from hwna,
+ * rx rings on one side equals tx rings on the other.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
+ NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
+ NMR(na, t)[i].ring = NMR(hwna, r)[i].ring;
+ }
+ }
+
+ if (na->na_flags & NAF_HOST_RINGS) {
+ /* the hostna rings are the host rings of the bwrap.
+ * The corresponding krings must point back to the
+ * hostna
+ */
+ hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
+ hostna->tx_rings[0].na = hostna;
+ hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
+ hostna->rx_rings[0].na = hostna;
}
return 0;
+err_dec_users:
+ for_rx_tx(t) {
+ NMR(hwna, t)[i].users--;
+ }
+ hwna->nm_krings_delete(hwna);
err_del_vp_rings:
netmap_vp_krings_delete(na);
@@ -2609,9 +2663,20 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
+ enum txrx t;
+ int i;
ND("%s", na->name);
+ /* decrement the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
+ NMR(hwna, t)[i].users--;
+ }
+ }
+
+ /* delete any netmap rings that are no longer needed */
+ netmap_mem_rings_delete(hwna);
hwna->nm_krings_delete(hwna);
netmap_vp_krings_delete(na);
}
@@ -2699,7 +2764,7 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
if (npriv == NULL)
return ENOMEM;
npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
- error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
+ error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
if (error) {
netmap_priv_delete(npriv);
return error;
@@ -2766,6 +2831,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->nm_mem = netmap_mem_get(hwna->nm_mem);
na->virt_hdr_len = hwna->virt_hdr_len;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
+ /* Set the mfs, needed on the VALE mismatch datapath. */
+ bna->up.mfs = NM_BDG_MFS_DEFAULT;
bna->hwna = hwna;
netmap_adapter_get(hwna);
@@ -2793,6 +2860,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->na_hostvp = hwna->na_hostvp =
hostna->na_hostvp = &bna->host;
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
+ bna->host.mfs = NM_BDG_MFS_DEFAULT;
}
ND("%s<->%s txr %d txd %d rxr %d rxd %d",