aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/conf/files1
-rw-r--r--sys/dev/netmap/netmap.c125
-rw-r--r--sys/dev/netmap/netmap_bdg.c1827
-rw-r--r--sys/dev/netmap/netmap_bdg.h155
-rw-r--r--sys/dev/netmap/netmap_freebsd.c38
-rw-r--r--sys/dev/netmap/netmap_generic.c120
-rw-r--r--sys/dev/netmap/netmap_kern.h327
-rw-r--r--sys/dev/netmap/netmap_mem2.c12
-rw-r--r--sys/dev/netmap/netmap_monitor.c231
-rw-r--r--sys/dev/netmap/netmap_pipe.c60
-rw-r--r--sys/dev/netmap/netmap_vale.c2172
-rw-r--r--sys/net/netmap.h2
-rw-r--r--sys/net/netmap_user.h27
13 files changed, 2764 insertions, 2333 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 4f5ff7bdf1b9..833c5f17b788 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2522,6 +2522,7 @@ dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
dev/netmap/netmap_legacy.c optional netmap
+dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index fef919f4ed57..42697e2874aa 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -521,6 +521,9 @@ int netmap_generic_txqdisc = 1;
int netmap_generic_ringsize = 1024;
int netmap_generic_rings = 1;
+/* Non-zero to enable checksum offloading in NIC drivers */
+int netmap_generic_hwcsum = 0;
+
/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
int ptnet_vnet_hdr = 1;
@@ -549,6 +552,9 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
"Adapter mode. 0 selects the best option available,"
"1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
+ 0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
+ "1 to enable checksum generation by the NIC");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
0, "RX notification interval in nanoseconds");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
@@ -827,8 +833,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
}
/* account for the (possibly fake) host rings */
- n[NR_TX] = na->num_tx_rings + 1;
- n[NR_RX] = na->num_rx_rings + 1;
+ n[NR_TX] = netmap_all_rings(na, NR_TX);
+ n[NR_RX] = netmap_all_rings(na, NR_RX);
len = (n[NR_TX] + n[NR_RX]) *
(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
@@ -930,11 +936,14 @@ netmap_krings_delete(struct netmap_adapter *na)
void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
- struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
+ u_int lim = netmap_real_rings(na, NR_RX), i;
- ND("destroy sw mbq with len %d", mbq_len(q));
- mbq_purge(q);
- mbq_safe_fini(q);
+ for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
+ struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
+ ND("destroy sw mbq with len %d", mbq_len(q));
+ mbq_purge(q);
+ mbq_safe_fini(q);
+ }
netmap_krings_delete(na);
}
@@ -1535,7 +1544,7 @@ netmap_get_na(struct nmreq_header *hdr,
goto out;
/* try to see if this is a bridge port */
- error = netmap_get_bdg_na(hdr, na, nmd, create);
+ error = netmap_get_vale_na(hdr, na, nmd, create);
if (error)
goto out;
@@ -1827,7 +1836,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
nma_get_nrings(na, t) : 0);
- priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+ priv->np_qlast[t] = netmap_all_rings(na, t);
ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
@@ -2543,7 +2552,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)&regreq;
- error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+ error = netmap_get_vale_na(hdr, &na, NULL, 0);
hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
hdr->nr_body = (uintptr_t)req;
if (na && !error) {
@@ -3336,6 +3345,12 @@ netmap_attach_common(struct netmap_adapter *na)
}
na->pdev = na; /* make sure netmap_mem_map() is called */
#endif /* __FreeBSD__ */
+ if (na->na_flags & NAF_HOST_RINGS) {
+ if (na->num_host_rx_rings == 0)
+ na->num_host_rx_rings = 1;
+ if (na->num_host_tx_rings == 0)
+ na->num_host_tx_rings = 1;
+ }
if (na->nm_krings_create == NULL) {
/* we assume that we have been called by a driver,
* since other port types all provide their own
@@ -3357,7 +3372,7 @@ netmap_attach_common(struct netmap_adapter *na)
/* no special nm_bdg_attach callback. On VALE
* attach, we need to interpose a bwrap
*/
- na->nm_bdg_attach = netmap_bwrap_attach;
+ na->nm_bdg_attach = netmap_default_bdg_attach;
#endif
return 0;
@@ -3399,10 +3414,10 @@ out:
static void
netmap_hw_dtor(struct netmap_adapter *na)
{
- if (nm_iszombie(na) || na->ifp == NULL)
+ if (na->ifp == NULL)
return;
- WNA(na->ifp) = NULL;
+ NM_DETACH_NA(na->ifp);
}
@@ -3426,10 +3441,10 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
}
if (arg == NULL || arg->ifp == NULL)
- goto fail;
+ return EINVAL;
ifp = arg->ifp;
- if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
@@ -3456,28 +3471,8 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
NM_ATTACH_NA(ifp, &hwna->up);
-#ifdef linux
- if (ifp->netdev_ops) {
- /* prepare a clone of the netdev ops */
-#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
- hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
-#else
- hwna->nm_ndo = *ifp->netdev_ops;
-#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
- }
- hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
- hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
- if (ifp->ethtool_ops) {
- hwna->nm_eto = *ifp->ethtool_ops;
- }
- hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
- hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
- if (arg->nm_config == NULL) {
- hwna->up.nm_config = netmap_linux_config;
- }
-#endif /* linux */
+ nm_os_onattach(ifp);
+
if (arg->nm_dtor == NULL) {
hwna->up.nm_dtor = netmap_hw_dtor;
}
@@ -3545,7 +3540,10 @@ netmap_hw_krings_create(struct netmap_adapter *na)
int ret = netmap_krings_create(na, 0);
if (ret == 0) {
/* initialize the mbq for the sw rx ring */
- mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
+ u_int lim = netmap_real_rings(na, NR_RX), i;
+ for (i = na->num_rx_rings; i < lim; i++) {
+ mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
+ }
ND("initialized sw rx queue %d", na->num_rx_rings);
}
return ret;
@@ -3608,8 +3606,14 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
unsigned int txr;
struct mbq *q;
int busy;
+ u_int i;
+
+ i = MBUF_TXQ(m);
+ if (i >= na->num_host_rx_rings) {
+ i = i % na->num_host_rx_rings;
+ }
+ kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
- kring = na->rx_rings[na->num_rx_rings];
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
@@ -3639,8 +3643,15 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
goto done;
}
- if (nm_os_mbuf_has_offld(m)) {
- RD(1, "%s drop mbuf that needs offloadings", na->name);
+ if (!netmap_generic_hwcsum) {
+ if (nm_os_mbuf_has_csum_offld(m)) {
+ RD(1, "%s drop mbuf that needs checksum offload", na->name);
+ goto done;
+ }
+ }
+
+ if (nm_os_mbuf_has_seg_offld(m)) {
+ RD(1, "%s drop mbuf that needs generic segmentation offload", na->name);
goto done;
}
@@ -3845,6 +3856,40 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
return netmap_common_irq(na, q, work_done);
}
+/* set/clear native flags and if_transmit/netdev_ops */
+void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We do the setup for intercepting packets only if we are the
+ * first user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ na->na_flags |= NAF_NETMAP_ON;
+ nm_os_onenter(ifp);
+ nm_update_hostrings_mode(na);
+}
+
+void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We undo the setup for intercepting packets only if we are the
+ * last user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ nm_update_hostrings_mode(na);
+ nm_os_onexit(ifp);
+
+ na->na_flags &= ~NAF_NETMAP_ON;
+}
+
/*
* Module loader and unloader
diff --git a/sys/dev/netmap/netmap_bdg.c b/sys/dev/netmap/netmap_bdg.c
new file mode 100644
index 000000000000..dd64b805cbf1
--- /dev/null
+++ b/sys/dev/netmap/netmap_bdg.c
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+ struct nm_bridge *b = vp->na_bdg;
+ if (b == NULL)
+ return NULL;
+ return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+ int colon_pos = -1;
+ int i;
+
+ if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ return -1;
+ }
+
+ for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+ if (name[i] == ':') {
+ colon_pos = i;
+ break;
+ } else if (!nm_is_id_char(name[i])) {
+ return -1;
+ }
+ }
+
+ if (strlen(name) - colon_pos > IFNAMSIZ) {
+ /* interface name too long */
+ return -1;
+ }
+
+ return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+ int i, namelen;
+ struct nm_bridge *b = NULL, *bridges;
+ u_int num_bridges;
+
+ NMG_LOCK_ASSERT();
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ namelen = nm_vale_name_validate(name);
+ if (namelen < 0) {
+ D("invalid bridge name %s", name ? name : NULL);
+ return NULL;
+ }
+
+ /* lookup the name, remember empty slot if there is one */
+ for (i = 0; i < num_bridges; i++) {
+ struct nm_bridge *x = bridges + i;
+
+ if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+ if (create && b == NULL)
+ b = x; /* record empty slot */
+ } else if (x->bdg_namelen != namelen) {
+ continue;
+ } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ b = x;
+ break;
+ }
+ }
+ if (i == num_bridges && b) { /* name not found, can create entry */
+ /* initialize the bridge */
+ ND("create new bridge %s with ports %d", b->bdg_basename,
+ b->bdg_active_ports);
+ b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ if (b->ht == NULL) {
+ D("failed to allocate hash table");
+ return NULL;
+ }
+ strncpy(b->bdg_basename, name, namelen);
+ b->bdg_namelen = namelen;
+ b->bdg_active_ports = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++)
+ b->bdg_port_index[i] = i;
+ /* set the default function */
+ b->bdg_ops = ops;
+ b->private_data = b->ht;
+ b->bdg_flags = 0;
+ NM_BNS_GET(b);
+ }
+ return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+ if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+ return EBUSY;
+ }
+
+ ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
+ b->bdg_ops = NULL;
+ b->bdg_flags = 0;
+ NM_BNS_PUT(b);
+ return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+ int s_hw = hw, s_sw = sw;
+ int i, lim =b->bdg_active_ports;
+ uint32_t *tmp = b->tmp_bdg_port_index;
+
+ /*
+ New algorithm:
+ make a copy of bdg_port_index;
+ lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+ in the array of bdg_port_index, replacing them with
+ entries from the bottom of the array;
+ decrement bdg_active_ports;
+ acquire BDG_WLOCK() and copy back the array.
+ */
+
+ if (netmap_verbose)
+ D("detach %d and %d (lim %d)", hw, sw, lim);
+ /* make a copy of the list of active ports, update it,
+ * and then copy back within BDG_WLOCK().
+ */
+ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+ if (hw >= 0 && tmp[i] == hw) {
+ ND("detach hw %d at %d", hw, i);
+ lim--; /* point to last active port */
+ tmp[i] = tmp[lim]; /* swap with i */
+ tmp[lim] = hw; /* now this is inactive */
+ hw = -1;
+ } else if (sw >= 0 && tmp[i] == sw) {
+ ND("detach sw %d at %d", sw, i);
+ lim--;
+ tmp[i] = tmp[lim];
+ tmp[lim] = sw;
+ sw = -1;
+ } else {
+ i++;
+ }
+ }
+ if (hw >= 0 || sw >= 0) {
+ D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ }
+
+ BDG_WLOCK(b);
+ if (b->bdg_ops->dtor)
+ b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ b->bdg_ports[s_hw] = NULL;
+ if (s_sw >= 0) {
+ b->bdg_ports[s_sw] = NULL;
+ }
+ memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ b->bdg_active_ports = lim;
+ BDG_WUNLOCK(b);
+
+ ND("now %d active ports", lim);
+ netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+ struct nm_bridge *b = vpna->na_bdg;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ return 0; /* nothing to do */
+ }
+ if (b) {
+ netmap_set_all_rings(na, 0 /* disable */);
+ netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+ vpna->na_bdg = NULL;
+ netmap_set_all_rings(na, 1 /* enable */);
+ }
+ /* I have took reference just for attach */
+ netmap_adapter_put(na);
+ return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+ char *nr_name = hdr->nr_name;
+ const char *ifname;
+ struct ifnet *ifp = NULL;
+ int error = 0;
+ struct netmap_vp_adapter *vpna, *hostna = NULL;
+ struct nm_bridge *b;
+ uint32_t i, j;
+ uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+ int needed;
+
+ *na = NULL; /* default return value */
+
+ /* first try to see if this is a bridge port. */
+ NMG_LOCK_ASSERT();
+ if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+ return 0; /* no error, but no VALE prefix */
+ }
+
+ b = nm_find_bridge(nr_name, create, ops);
+ if (b == NULL) {
+ ND("no bridges available for '%s'", nr_name);
+ return (create ? ENOMEM : ENXIO);
+ }
+ if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+ panic("x");
+
+ /* Now we are sure that name starts with the bridge's name,
+ * lookup the port in the bridge. We need to scan the entire
+ * list. It is not important to hold a WLOCK on the bridge
+ * during the search because NMG_LOCK already guarantees
+ * that there are no other possible writers.
+ */
+
+ /* lookup in the local list of ports */
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ ND("checking %s", vpna->up.name);
+ if (!strcmp(vpna->up.name, nr_name)) {
+ netmap_adapter_get(&vpna->up);
+ ND("found existing if %s refs %d", nr_name)
+ *na = &vpna->up;
+ return 0;
+ }
+ }
+ /* not found, should we create it? */
+ if (!create)
+ return ENXIO;
+ /* yes we should, see if we have space to attach entries */
+ needed = 2; /* in some cases we only need 1 */
+ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+ D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ return ENOMEM;
+ }
+ /* record the next two ports available, but do not allocate yet */
+ cand = b->bdg_port_index[b->bdg_active_ports];
+ cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+ ND("+++ bridge %s port %s used %d avail %d %d",
+ b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+ /*
+ * try see if there is a matching NIC with this name
+ * (after the bridge's name)
+ */
+ ifname = nr_name + b->bdg_namelen + 1;
+ ifp = ifunit_ref(ifname);
+ if (!ifp) {
+ /* Create an ephemeral virtual port.
+ * This block contains all the ephemeral-specific logic.
+ */
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* bdg_netmap_attach creates a struct netmap_adapter */
+ error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto out;
+ }
+ /* shortcut - we can skip get_hw_na(),
+ * ownership check and nm_bdg_attach()
+ */
+
+ } else {
+ struct netmap_adapter *hw;
+
+ /* the vale:nic syntax is only valid for some commands */
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_VALE_ATTACH:
+ case NETMAP_REQ_VALE_DETACH:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
+ break; /* ok */
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ error = netmap_get_hw_na(ifp, nmd, &hw);
+ if (error || hw == NULL)
+ goto out;
+
+ /* host adapter might not be created */
+ error = hw->nm_bdg_attach(nr_name, hw, b);
+ if (error == NM_NEED_BWRAP) {
+ error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ }
+ if (error)
+ goto out;
+ vpna = hw->na_vp;
+ hostna = hw->na_hostvp;
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ /* Check if we need to skip the host rings. */
+ struct nmreq_vale_attach *areq =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+ hostna = NULL;
+ }
+ }
+ }
+
+ BDG_WLOCK(b);
+ vpna->bdg_port = cand;
+ ND("NIC %p to bridge port %d", vpna, cand);
+ /* bind the port to the bridge (virtual ports are not active) */
+ b->bdg_ports[cand] = vpna;
+ vpna->na_bdg = b;
+ b->bdg_active_ports++;
+ if (hostna != NULL) {
+ /* also bind the host stack to the bridge */
+ b->bdg_ports[cand2] = hostna;
+ hostna->bdg_port = cand2;
+ hostna->na_bdg = b;
+ b->bdg_active_ports++;
+ ND("host %p to bridge port %d", hostna, cand2);
+ }
+ ND("if %s refs %d", ifname, vpna->up.na_refcount);
+ BDG_WUNLOCK(b);
+ *na = &vpna->up;
+ netmap_adapter_get(*na);
+
+out:
+ if (ifp)
+ if_rele(ifp);
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+ struct nm_kctx *nmk;
+ u_int qfirst;
+ u_int qlast;
+ struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+ bool configured;
+ bool stopped;
+ struct netmap_bwrap_adapter *bna;
+ uint32_t mode;
+ u_int qfirst;
+ u_int qlast;
+ u_int cpu_from;
+ u_int ncpus;
+ struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+ struct nm_bdg_kthread *nbk = data;
+ struct netmap_bwrap_adapter *bna;
+ u_int qfirst, qlast, i;
+ struct netmap_kring **kring0, *kring;
+
+ if (!nbk)
+ return;
+ qfirst = nbk->qfirst;
+ qlast = nbk->qlast;
+ bna = nbk->bps->bna;
+ kring0 = NMR(bna->hwna, NR_RX);
+
+ for (i = qfirst; i < qlast; i++) {
+ kring = kring0[i];
+ kring->nm_notify(kring, 0);
+ }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+ struct nm_kctx_cfg kcfg;
+ int i, j;
+
+ bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
+ if (bps->kthreads == NULL)
+ return ENOMEM;
+
+ bzero(&kcfg, sizeof(kcfg));
+ kcfg.worker_fn = netmap_bwrap_polling;
+ kcfg.use_kthread = 1;
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ int all = (bps->ncpus == 1 &&
+ bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+ int affinity = bps->cpu_from + i;
+
+ t->bps = bps;
+ t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+ t->qlast = all ? bps->qlast : t->qfirst + 1;
+ D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
+
+ kcfg.type = i;
+ kcfg.worker_private = t;
+ t->nmk = nm_os_kctx_create(&kcfg, NULL);
+ if (t->nmk == NULL) {
+ goto cleanup;
+ }
+ nm_os_kctx_worker_setaff(t->nmk, affinity);
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_destroy(t->nmk);
+ }
+ nm_os_free(bps->kthreads);
+ return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int error, i, j;
+
+ if (!bps) {
+ D("polling is not configured");
+ return EFAULT;
+ }
+ bps->stopped = false;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ error = nm_os_kctx_worker_start(t->nmk);
+ if (error) {
+ D("error in nm_kthread_start()");
+ goto cleanup;
+ }
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ }
+ bps->stopped = true;
+ return error;
+}
+
+static void
+nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int i;
+
+ if (!bps)
+ return;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ nm_os_kctx_destroy(t->nmk);
+ }
+ bps->stopped = true;
+}
+
+static int
+get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
+ struct nm_bdg_polling_state *bps)
+{
+ unsigned int avail_cpus, core_from;
+ unsigned int qfirst, qlast;
+ uint32_t i = req->nr_first_cpu_id;
+ uint32_t req_cpus = req->nr_num_polling_cpus;
+
+ avail_cpus = nm_os_ncpus();
+
+ if (req_cpus == 0) {
+ D("req_cpus must be > 0");
+ return EINVAL;
+ } else if (req_cpus >= avail_cpus) {
+ D("Cannot use all the CPUs in the system");
+ return EINVAL;
+ }
+
+ if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
+ /* Use a separate core for each ring. If nr_num_polling_cpus>1
+ * more consecutive rings are polled.
+ * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
+ * ring 2 and 3 are polled by core 2 and 3, respectively. */
+ if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
+ D("Rings %u-%u not in range (have %d rings)",
+ i, i + req_cpus, nma_get_nrings(na, NR_RX));
+ return EINVAL;
+ }
+ qfirst = i;
+ qlast = qfirst + req_cpus;
+ core_from = qfirst;
+
+ } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
+ /* Poll all the rings using a core specified by nr_first_cpu_id.
+ * the number of cores must be 1. */
+ if (req_cpus != 1) {
+ D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ "(was %d)", req_cpus);
+ return EINVAL;
+ }
+ qfirst = 0;
+ qlast = nma_get_nrings(na, NR_RX);
+ core_from = i;
+ } else {
+ D("Invalid polling mode");
+ return EINVAL;
+ }
+
+ bps->mode = req->nr_mode;
+ bps->qfirst = qfirst;
+ bps->qlast = qlast;
+ bps->cpu_from = core_from;
+ bps->ncpus = req_cpus;
+ D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
+ "MULTI" : "SINGLE",
+ qfirst, qlast, core_from, req_cpus);
+ return 0;
+}
+
+static int
+nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
+{
+ struct nm_bdg_polling_state *bps;
+ struct netmap_bwrap_adapter *bna;
+ int error;
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ if (bna->na_polling_state) {
+ D("ERROR adapter already in polling mode");
+ return EFAULT;
+ }
+
+ bps = nm_os_malloc(sizeof(*bps));
+ if (!bps)
+ return ENOMEM;
+ bps->configured = false;
+ bps->stopped = true;
+
+ if (get_polling_cfg(req, na, bps)) {
+ nm_os_free(bps);
+ return EINVAL;
+ }
+
+ if (nm_bdg_create_kthreads(bps)) {
+ nm_os_free(bps);
+ return EFAULT;
+ }
+
+ bps->configured = true;
+ bna->na_polling_state = bps;
+ bps->bna = bna;
+
+ /* disable interrupts if possible */
+ nma_intr_enable(bna->hwna, 0);
+ /* start kthread now */
+ error = nm_bdg_polling_start_kthreads(bps);
+ if (error) {
+ D("ERROR nm_bdg_polling_start_kthread()");
+ nm_os_free(bps->kthreads);
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ nma_intr_enable(bna->hwna, 1);
+ }
+ return error;
+}
+
+static int
+nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
+ struct nm_bdg_polling_state *bps;
+
+ if (!bna->na_polling_state) {
+ D("ERROR adapter is not in polling mode");
+ return EFAULT;
+ }
+ bps = bna->na_polling_state;
+ nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
+ bps->configured = false;
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ /* reenable interrupts */
+ nma_intr_enable(bna->hwna, 1);
+ return 0;
+}
+
+int
+nm_bdg_polling(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_polling *req =
+ (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
+ struct netmap_adapter *na = NULL;
+ int error = 0;
+
+ NMG_LOCK();
+ error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
+ if (na && !error) {
+ if (!nm_is_bwrap(na)) {
+ error = EOPNOTSUPP;
+ } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
+ error = nm_bdg_ctl_polling_start(req, na);
+ if (!error)
+ netmap_adapter_get(na);
+ } else {
+ error = nm_bdg_ctl_polling_stop(na);
+ if (!error)
+ netmap_adapter_put(na);
+ }
+ netmap_adapter_put(na);
+ } else if (!na && !error) {
+ /* Not VALE port. */
+ error = EINVAL;
+ }
+ NMG_UNLOCK();
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_bdg_list(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
+ struct nm_bridge *b, *bridges;
+ struct netmap_vp_adapter *vpna;
+ int error = 0, i, j;
+ u_int num_bridges;
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return ENOENT;
+ }
+
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ D("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strncpy(hdr->nr_name, vpna->up.name,
+ (size_t)IFNAMSIZ);
+ error = 0;
+ goto out;
+ }
+ j = 0; /* following bridges scan from 0 */
+ }
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
+ NMG_UNLOCK();
+ }
+
+ return error;
+}
+
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to set configure/lookup/dtor functions of a VALE instance.
+ * Register callbacks to the given bridge. 'name' may be just
+ * bridge's name (including ':' if it is not just NM_BDG_NAME).
+ *
+ * Called without NMG_LOCK.
+ */
+
+int
+netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
+{
+ struct nm_bridge *b;
+ int error = 0;
+
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
+ if (!b) {
+ error = ENXIO;
+ goto unlock_regops;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_regops;
+ }
+
+ BDG_WLOCK(b);
+ if (!bdg_ops) {
+ /* resetting the bridge */
+ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ b->bdg_ops = NULL;
+ b->private_data = b->ht;
+ } else {
+ /* modifying the bridge */
+ b->private_data = private_data;
+ b->bdg_ops = bdg_ops;
+ }
+ BDG_WUNLOCK(b);
+
+unlock_regops:
+ NMG_UNLOCK();
+ return error;
+}
+
+
+int
+netmap_bdg_config(struct nm_ifreq *nr)
+{
+ struct nm_bridge *b;
+ int error = EINVAL;
+
+ NMG_LOCK();
+ b = nm_find_bridge(nr->nifr_name, 0, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return error;
+ }
+ NMG_UNLOCK();
+ /* Don't call config() with NMG_LOCK() held */
+ BDG_RLOCK(b);
+ if (b->bdg_ops->config != NULL)
+ error = b->bdg_ops->config(nr);
+ BDG_RUNLOCK(b);
+ return error;
+}
+
+
+/* nm_register callback for VALE ports */
+int
+netmap_vp_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_vp_adapter *vpna =
+ (struct netmap_vp_adapter*)na;
+ enum txrx t;
+ int i;
+
+ /* persistent ports may be put in netmap mode
+ * before being attached to a bridge
+ */
+ if (vpna->na_bdg)
+ BDG_WLOCK(vpna->na_bdg);
+ if (onoff) {
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_on(kring))
+ kring->nr_mode = NKR_NETMAP_ON;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ /* XXX on FreeBSD, persistent VALE ports should also
+ * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
+ */
+ } else {
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_off(kring))
+ kring->nr_mode = NKR_NETMAP_OFF;
+ }
+ }
+ }
+ if (vpna->na_bdg)
+ BDG_WUNLOCK(vpna->na_bdg);
+ return 0;
+}
+
+
+/* rxsync code used by VALE ports nm_rxsync callback and also
+ * internally by the brwap
+ */
+static int
+netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_ring *ring = kring->ring;
+ u_int nm_i, lim = kring->nkr_num_slots - 1;
+ u_int head = kring->rhead;
+ int n;
+
+ if (head > lim) {
+ D("ouch dangerous reset!!!");
+ n = netmap_ring_reinit(kring);
+ goto done;
+ }
+
+ /* First part, import newly received packets. */
+ /* actually nothing to do here, they are already in the kring */
+
+ /* Second part, skip past packets that userspace has released. */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) {
+ /* consistency check, but nothing really important here */
+ for (n = 0; likely(nm_i != head); n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ void *addr = NMB(na, slot);
+
+ if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
+ D("bad buffer index %d, ignore ?",
+ slot->buf_idx);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+ nm_i = nm_next(nm_i, lim);
+ }
+ kring->nr_hwcur = head;
+ }
+
+ n = 0;
+done:
+ return n;
+}
+
+/*
+ * nm_rxsync callback for VALE ports
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+int
+netmap_vp_rxsync(struct netmap_kring *kring, int flags)
+{
+ int n;
+
+ mtx_lock(&kring->q_lock);
+ n = netmap_vp_rxsync_locked(kring, flags);
+ mtx_unlock(&kring->q_lock);
+ return n;
+}
+
+int
+netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
+ struct netmap_bdg_ops *ops)
+{
+ return ops->bwrap_attach(nr_name, hwna);
+}
+
+
+/* Bridge wrapper code (bwrap).
+ * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
+ * VALE switch.
+ * The main task is to swap the meaning of tx and rx rings to match the
+ * expectations of the VALE switch code (see nm_bdg_flush).
+ *
+ * The bwrap works by interposing a netmap_bwrap_adapter between the
+ * rest of the system and the hwna. The netmap_bwrap_adapter looks like
+ * a netmap_vp_adapter to the rest the system, but, internally, it
+ * translates all callbacks to what the hwna expects.
+ *
+ * Note that we have to intercept callbacks coming from two sides:
+ *
+ * - callbacks coming from the netmap module are intercepted by
+ * passing around the netmap_bwrap_adapter instead of the hwna
+ *
+ * - callbacks coming from outside of the netmap module only know
+ * about the hwna. This, however, only happens in interrupt
+ * handlers, where only the hwna->nm_notify callback is called.
+ * What the bwrap does is to overwrite the hwna->nm_notify callback
+ * with its own netmap_bwrap_intr_notify.
+ * XXX This assumes that the hwna->nm_notify callback was the
+ * standard netmap_notify(), as it is the case for nic adapters.
+ * Any additional action performed by hwna->nm_notify will not be
+ * performed by netmap_bwrap_intr_notify.
+ *
+ * Additionally, the bwrap can optionally attach the host rings pair
+ * of the wrapped adapter to a different port of the switch.
+ */
+
+
+static void
+netmap_bwrap_dtor(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct nm_bridge *b = bna->up.na_bdg,
+ *bh = bna->host.na_bdg;
+
+ if (bna->host.up.nm_mem)
+ netmap_mem_put(bna->host.up.nm_mem);
+
+ if (b) {
+ netmap_bdg_detach_common(b, bna->up.bdg_port,
+ (bh ? bna->host.bdg_port : -1));
+ }
+
+ ND("na %p", na);
+ na->ifp = NULL;
+ bna->host.up.ifp = NULL;
+ hwna->na_vp = bna->saved_na_vp;
+ hwna->na_hostvp = NULL;
+ hwna->na_private = NULL;
+ hwna->na_flags &= ~NAF_BUSY;
+ netmap_adapter_put(hwna);
+
+}
+
+
+/*
+ * Intr callback for NICs connected to a bridge.
+ * Simply ignore tx interrupts (maybe we could try to recover space ?)
+ * and pass received packets from nic to the bridge.
+ *
+ * XXX TODO check locking: this is called from the interrupt
+ * handler so we should make sure that the interface is not
+ * disconnected while passing down an interrupt.
+ *
+ * Note, no user process can access this NIC or the host stack.
+ * The only part of the ring that is significant are the slots,
+ * and head/cur/tail are set from the kring as needed
+ * (part as a receive ring, part as a transmit ring).
+ *
+ * callback that overwrites the hwna notify callback.
+ * Packets come from the outside or from the host stack and are put on an
+ * hwna rx ring.
+ * The bridge wrapper then sends the packets through the bridge.
+ */
+static int
+netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_kring *bkring;
+ struct netmap_vp_adapter *vpna = &bna->up;
+ u_int ring_nr = kring->ring_id;
+ int ret = NM_IRQ_COMPLETED;
+ int error;
+
+ if (netmap_verbose)
+ D("%s %s 0x%x", na->name, kring->name, flags);
+
+ bkring = vpna->up.tx_rings[ring_nr];
+
+ /* make sure the ring is not disabled */
+ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
+ return EIO;
+ }
+
+ if (netmap_verbose)
+ D("%s head %d cur %d tail %d", na->name,
+ kring->rhead, kring->rcur, kring->rtail);
+
+ /* simulate a user wakeup on the rx ring
+ * fetch packets that have arrived.
+ */
+ error = kring->nm_sync(kring, 0);
+ if (error)
+ goto put_out;
+ if (kring->nr_hwcur == kring->nr_hwtail) {
+ if (netmap_verbose)
+ D("how strange, interrupt with no packets on %s",
+ na->name);
+ goto put_out;
+ }
+
+ /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
+ * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
+ * to push all packets out.
+ */
+ bkring->rhead = bkring->rcur = kring->nr_hwtail;
+
+ bkring->nm_sync(bkring, flags);
+
+ /* mark all buffers as released on this ring */
+ kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
+ /* another call to actually release the buffers */
+ error = kring->nm_sync(kring, 0);
+
+ /* The second rxsync may have further advanced hwtail. If this happens,
+ * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
+ if (kring->rcur != kring->nr_hwtail) {
+ ret = NM_IRQ_RESCHED;
+ }
+put_out:
+ nm_kr_put(kring);
+
+ return error ? error : ret;
+}
+
+
+/* nm_register callback for bwrap */
+int
+netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_vp_adapter *hostna = &bna->host;
+ int error, i;
+ enum txrx t;
+
+ ND("%s %s", na->name, onoff ? "on" : "off");
+
+ if (onoff) {
+ /* netmap_do_regif has been called on the bwrap na.
+ * We need to pass the information about the
+ * memory allocator down to the hwna before
+ * putting it in netmap mode
+ */
+ hwna->na_lut = na->na_lut;
+
+ if (hostna->na_bdg) {
+ /* if the host rings have been attached to switch,
+ * we need to copy the memory allocator information
+ * in the hostna also
+ */
+ hostna->up.na_lut = na->na_lut;
+ }
+
+ }
+
+ /* pass down the pending ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
+ NMR(na, t)[i]->nr_pending_mode;
+ }
+ }
+
+ /* forward the request to the hwna */
+ error = hwna->nm_register(hwna, onoff);
+ if (error)
+ return error;
+
+ /* copy up the current ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
+ NMR(na, t)[i]->nr_mode = kring->nr_mode;
+ }
+ }
+
+ /* impersonate a netmap_vp_adapter */
+ netmap_vp_reg(na, onoff);
+ if (hostna->na_bdg)
+ netmap_vp_reg(&hostna->up, onoff);
+
+ if (onoff) {
+ u_int i;
+ /* intercept the hwna nm_nofify callback on the hw rings */
+ for (i = 0; i < hwna->num_rx_rings; i++) {
+ hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
+ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
+ }
+ i = hwna->num_rx_rings; /* for safety */
+ /* save the host ring notify unconditionally */
+ for (; i < netmap_real_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->save_notify =
+ hwna->rx_rings[i]->nm_notify;
+ if (hostna->na_bdg) {
+ /* also intercept the host ring notify */
+ hwna->rx_rings[i]->nm_notify =
+ netmap_bwrap_intr_notify;
+ na->tx_rings[i]->nm_sync = na->nm_txsync;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ } else {
+ u_int i;
+
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+
+ /* reset all notify callbacks (including host ring) */
+ for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->nm_notify =
+ hwna->rx_rings[i]->save_notify;
+ hwna->rx_rings[i]->save_notify = NULL;
+ }
+ hwna->na_lut.lut = NULL;
+ hwna->na_lut.plut = NULL;
+ hwna->na_lut.objtotal = 0;
+ hwna->na_lut.objsize = 0;
+
+ /* pass ownership of the netmap rings to the hwna */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(na, t)[i]->ring = NULL;
+ }
+ }
+ /* reset the number of host rings to default */
+ for_rx_tx(t) {
+ nma_set_host_nrings(hwna, t, 1);
+ }
+
+ }
+
+ return 0;
+}
+
+/* nm_config callback for bwrap */
+static int
+netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ int error;
+
+ /* Forward the request to the hwna. It may happen that nobody
+ * registered hwna yet, so netmap_mem_get_lut() may have not
+ * been called yet. */
+ error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
+ if (error)
+ return error;
+ netmap_update_config(hwna);
+ /* swap the results and propagate */
+ info->num_tx_rings = hwna->num_rx_rings;
+ info->num_tx_descs = hwna->num_rx_desc;
+ info->num_rx_rings = hwna->num_tx_rings;
+ info->num_rx_descs = hwna->num_tx_desc;
+ info->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ return 0;
+}
+
+
+/* nm_krings_create callback for bwrap */
+int
+netmap_bwrap_krings_create_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_adapter *hostna = &bna->host.up;
+ int i, error = 0;
+ enum txrx t;
+
+ /* also create the hwna krings */
+ error = hwna->nm_krings_create(hwna);
+ if (error) {
+ return error;
+ }
+
+ /* increment the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users++;
+ }
+ }
+
+ /* now create the actual rings */
+ error = netmap_mem_rings_create(hwna);
+ if (error) {
+ goto err_dec_users;
+ }
+
+ /* cross-link the netmap rings
+ * The original number of rings comes from hwna,
+ * rx rings on one side equals tx rings on the other.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < netmap_all_rings(hwna, r); i++) {
+ NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
+ NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
+ }
+ }
+
+ if (na->na_flags & NAF_HOST_RINGS) {
+ /* the hostna rings are the host rings of the bwrap.
+ * The corresponding krings must point back to the
+ * hostna
+ */
+ hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
+ hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(hostna, t); i++) {
+ NMR(hostna, t)[i]->na = hostna;
+ }
+ }
+ }
+
+ return 0;
+
+err_dec_users:
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+ hwna->nm_krings_delete(hwna);
+ return error;
+}
+
+
+void
+netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ enum txrx t;
+ int i;
+
+ ND("%s", na->name);
+
+ /* decrement the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+
+ /* delete any netmap rings that are no longer needed */
+ netmap_mem_rings_delete(hwna);
+ hwna->nm_krings_delete(hwna);
+}
+
+
+/* notify method for the bridge-->hwna direction */
+int
+netmap_bwrap_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_adapter *hwna = bna->hwna;
+ u_int ring_n = kring->ring_id;
+ u_int lim = kring->nkr_num_slots - 1;
+ struct netmap_kring *hw_kring;
+ int error;
+
+ ND("%s: na %s hwna %s",
+ (kring ? kring->name : "NULL!"),
+ (na ? na->name : "NULL!"),
+ (hwna ? hwna->name : "NULL!"));
+ hw_kring = hwna->tx_rings[ring_n];
+
+ if (nm_kr_tryget(hw_kring, 0, NULL)) {
+ return ENXIO;
+ }
+
+ /* first step: simulate a user wakeup on the rx ring */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ /* second step: the new packets are sent on the tx ring
+ * (which is actually the same ring)
+ */
+ hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
+ error = hw_kring->nm_sync(hw_kring, flags);
+ if (error)
+ goto put_out;
+
+ /* third step: now we are back the rx ring */
+ /* claim ownership on all hw owned bufs */
+ kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
+
+ /* fourth step: the user goes to sleep again, causing another rxsync */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
+put_out:
+ nm_kr_put(hw_kring);
+
+ return error ? error : NM_IRQ_COMPLETED;
+}
+
+
+/* nm_bdg_ctl callback for the bwrap.
+ * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
+ * On attach, it needs to provide a fake netmap_priv_d structure and
+ * perform a netmap_do_regif() on the bwrap. This will put both the
+ * bwrap and the hwna in netmap mode, with the netmap rings shared
+ * and cross linked. Moroever, it will start intercepting interrupts
+ * directed to hwna.
+ */
+static int
+netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_priv_d *npriv;
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ int error = 0;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (req->reg.nr_ringid != 0 ||
+ (req->reg.nr_mode != NR_REG_ALL_NIC &&
+ req->reg.nr_mode != NR_REG_NIC_SW)) {
+ /* We only support attaching all the NIC rings
+ * and/or the host stack. */
+ return EINVAL;
+ }
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ return EBUSY;
+ }
+ if (bna->na_kpriv) {
+ /* nothing to do */
+ return 0;
+ }
+ npriv = netmap_priv_new();
+ if (npriv == NULL)
+ return ENOMEM;
+ npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
+ error = netmap_do_regif(npriv, na, req->reg.nr_mode,
+ req->reg.nr_ringid, req->reg.nr_flags);
+ if (error) {
+ netmap_priv_delete(npriv);
+ return error;
+ }
+ bna->na_kpriv = npriv;
+ na->na_flags |= NAF_BUSY;
+ } else {
+ if (na->active_fds == 0) /* not registered */
+ return EINVAL;
+ netmap_priv_delete(bna->na_kpriv);
+ bna->na_kpriv = NULL;
+ na->na_flags &= ~NAF_BUSY;
+ }
+
+ return error;
+}
+
+/* attach a bridge wrapper to the 'real' device */
+int
+netmap_bwrap_attach_common(struct netmap_adapter *na,
+ struct netmap_adapter *hwna)
+{
+ struct netmap_bwrap_adapter *bna;
+ struct netmap_adapter *hostna = NULL;
+ int error = 0;
+ enum txrx t;
+
+ /* make sure the NIC is not already in use */
+ if (NETMAP_OWNED_BY_ANY(hwna)) {
+ D("NIC %s busy, cannot attach to bridge", hwna->name);
+ return EBUSY;
+ }
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ /* make bwrap ifp point to the real ifp */
+ na->ifp = hwna->ifp;
+ if_ref(na->ifp);
+ na->na_private = bna;
+ /* fill the ring data for the bwrap adapter with rx/tx meanings
+ * swapped. The real cross-linking will be done during register,
+ * when all the krings will have been created.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ nma_set_nrings(na, t, nma_get_nrings(hwna, r));
+ nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
+ }
+ na->nm_dtor = netmap_bwrap_dtor;
+ na->nm_config = netmap_bwrap_config;
+ na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
+ na->pdev = hwna->pdev;
+ na->nm_mem = netmap_mem_get(hwna->nm_mem);
+ na->virt_hdr_len = hwna->virt_hdr_len;
+ na->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ bna->hwna = hwna;
+ netmap_adapter_get(hwna);
+ hwna->na_private = bna; /* weak reference */
+ bna->saved_na_vp = hwna->na_vp;
+ hwna->na_vp = &bna->up;
+ bna->up.up.na_vp = &(bna->up);
+
+ if (hwna->na_flags & NAF_HOST_RINGS) {
+ if (hwna->na_flags & NAF_SW_ONLY)
+ na->na_flags |= NAF_SW_ONLY;
+ na->na_flags |= NAF_HOST_RINGS;
+ hostna = &bna->host.up;
+
+ /* limit the number of host rings to that of hw */
+ nm_bound_var(&hostna->num_tx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_TX), NULL);
+ nm_bound_var(&hostna->num_rx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_RX), NULL);
+
+ snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
+ hostna->ifp = hwna->ifp;
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t);
+ u_int nr = nma_get_nrings(hostna, t);
+
+ nma_set_nrings(hostna, t, nr);
+ nma_set_host_nrings(na, t, nr);
+ if (nma_get_host_nrings(hwna, t) < nr) {
+ nma_set_host_nrings(hwna, t, nr);
+ }
+ nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
+ }
+ // hostna->nm_txsync = netmap_bwrap_host_txsync;
+ // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+ hostna->nm_mem = netmap_mem_get(na->nm_mem);
+ hostna->na_private = bna;
+ hostna->na_vp = &bna->up;
+ na->na_hostvp = hwna->na_hostvp =
+ hostna->na_hostvp = &bna->host;
+ hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
+ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
+ }
+
+ ND("%s<->%s txr %d txd %d rxr %d rxd %d",
+ na->name, ifp->if_xname,
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+
+ error = netmap_attach_common(na);
+ if (error) {
+ goto err_put;
+ }
+ hwna->na_flags |= NAF_BUSY;
+ return 0;
+
+err_put:
+ hwna->na_vp = hwna->na_hostvp = NULL;
+ netmap_adapter_put(hwna);
+ return error;
+
+}
+
+struct nm_bridge *
+netmap_init_bridges2(u_int n)
+{
+ int i;
+ struct nm_bridge *b;
+
+ b = nm_os_malloc(sizeof(struct nm_bridge) * n);
+ if (b == NULL)
+ return NULL;
+ for (i = 0; i < n; i++)
+ BDG_RWINIT(&b[i]);
+ return b;
+}
+
+void
+netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
+{
+ int i;
+
+ if (b == NULL)
+ return;
+
+ for (i = 0; i < n; i++)
+ BDG_RWDESTROY(&b[i]);
+ nm_os_free(b);
+}
+
+int
+netmap_init_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ return netmap_bns_register();
+#else
+ nm_bridges = netmap_init_bridges2(NM_BRIDGES);
+ if (nm_bridges == NULL)
+ return ENOMEM;
+ return 0;
+#endif
+}
+
+void
+netmap_uninit_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ netmap_bns_unregister();
+#else
+ netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
+#endif
+}
diff --git a/sys/dev/netmap/netmap_bdg.h b/sys/dev/netmap/netmap_bdg.h
new file mode 100644
index 000000000000..c804a2b72008
--- /dev/null
+++ b/sys/dev/netmap/netmap_bdg.h
@@ -0,0 +1,155 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2013-2018 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _NET_NETMAP_BDG_H_
+#define _NET_NETMAP_BDG_H_
+
+#if defined(__FreeBSD__)
+#define BDG_RWLOCK_T struct rwlock // struct rwlock
+
+#define BDG_RWINIT(b) \
+ rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
+#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
+#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
+#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
+#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
+#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
+#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
+
+#endif /* __FreeBSD__ */
+
+/* XXX Should go away after fixing find_bridge() - Michio */
+#define NM_BDG_HASH 1024 /* forwarding table entries */
+
+/* XXX revise this */
+struct nm_hash_ent {
+ uint64_t mac; /* the top 2 bytes are the epoch */
+ uint64_t ports;
+};
+
+/* Default size for the Maximum Frame Size. */
+#define NM_BDG_MFS_DEFAULT 1514
+
+/*
+ * nm_bridge is a descriptor for a VALE switch.
+ * Interfaces for a bridge are all in bdg_ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search, but lookups only occur on attach/detach so we
+ * don't mind if they are slow.
+ *
+ * The bridge is non blocking on the transmit ports: excess
+ * packets are dropped if there is no room on the output port.
+ *
+ * bdg_lock protects accesses to the bdg_ports array.
+ * This is a rw lock (or equivalent).
+ */
+#define NM_BDG_IFNAMSIZ IFNAMSIZ
+struct nm_bridge {
+ /* XXX what is the proper alignment/layout ? */
+ BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
+ int bdg_namelen;
+ uint32_t bdg_active_ports;
+ char bdg_basename[NM_BDG_IFNAMSIZ];
+
+ /* Indexes of active ports (up to active_ports)
+ * and all other remaining ports.
+ */
+ uint32_t bdg_port_index[NM_BDG_MAXPORTS];
+ /* used by netmap_bdg_detach_common() */
+ uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS];
+
+ struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
+
+ /*
+ * Programmable lookup functions to figure out the destination port.
+ * It returns either of an index of the destination port,
+ * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
+ * forward this packet. ring_nr is the source ring index, and the
+ * function may overwrite this value to forward this packet to a
+ * different ring index.
+ * The function is set by netmap_bdg_regops().
+ */
+ struct netmap_bdg_ops *bdg_ops;
+
+ /*
+ * Contains the data structure used by the bdg_ops.lookup function.
+ * By default points to *ht which is allocated on attach and used by the default lookup
+ * otherwise will point to the data structure received by netmap_bdg_regops().
+ */
+ void *private_data;
+ struct nm_hash_ent *ht;
+
+ /* Currently used to specify if the bridge is still in use while empty and
+ * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
+ * and netmap_bdg_create().
+ */
+#define NM_BDG_ACTIVE 1
+#define NM_BDG_EXCLUSIVE 2
+ uint8_t bdg_flags;
+
+
+#ifdef CONFIG_NET_NS
+ struct net *ns;
+#endif /* CONFIG_NET_NS */
+};
+
+static inline void *
+nm_bdg_get_auth_token(struct nm_bridge *b)
+{
+ return b->ht;
+}
+
+/* bridge not in exclusive mode ==> always valid
+ * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
+ */
+static inline int
+nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
+{
+ return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
+}
+
+int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops);
+
+struct nm_bridge *nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops);
+int netmap_bdg_free(struct nm_bridge *b);
+void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw);
+int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na);
+int netmap_vp_reg(struct netmap_adapter *na, int onoff);
+int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
+int netmap_vp_reg(struct netmap_adapter *na, int onoff);
+int netmap_vp_rxsync(struct netmap_kring *kring, int flags);
+int netmap_bwrap_notify(struct netmap_kring *kring, int flags);
+int netmap_bwrap_attach_common(struct netmap_adapter *na,
+ struct netmap_adapter *hwna);
+int netmap_bwrap_krings_create_common(struct netmap_adapter *na);
+void netmap_bwrap_krings_delete_common(struct netmap_adapter *na);
+#define NM_NEED_BWRAP (-2)
+#endif /* _NET_NETMAP_BDG_H_ */
+
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index b2882768297b..5472cfef99bf 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -270,11 +270,17 @@ nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
}
int
-nm_os_mbuf_has_offld(struct mbuf *m)
+nm_os_mbuf_has_csum_offld(struct mbuf *m)
{
return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP |
CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
- CSUM_SCTP_IPV6 | CSUM_TSO);
+ CSUM_SCTP_IPV6);
+}
+
+int
+nm_os_mbuf_has_seg_offld(struct mbuf *m)
+{
+ return m->m_pkthdr.csum_flags & CSUM_TSO;
}
static void
@@ -632,7 +638,7 @@ struct nm_os_extmem {
void
nm_os_extmem_delete(struct nm_os_extmem *e)
{
- D("freeing %jx bytes", (uintmax_t)e->size);
+ D("freeing %zx bytes", (size_t)e->size);
vm_map_remove(kernel_map, e->kva, e->kva + e->size);
nm_os_free(e);
}
@@ -701,7 +707,7 @@ nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror)
VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
VM_PROT_READ | VM_PROT_WRITE, 0);
if (rv != KERN_SUCCESS) {
- D("vm_map_find(%jx) failed", (uintmax_t)e->size);
+ D("vm_map_find(%zx) failed", (size_t)e->size);
goto out_rel;
}
rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size,
@@ -1540,6 +1546,30 @@ out:
return error;
}
+void
+nm_os_onattach(struct ifnet *ifp)
+{
+}
+
+void
+nm_os_onenter(struct ifnet *ifp)
+{
+ struct netmap_adapter *na = NA(ifp);
+
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_transmit;
+ ifp->if_capenable |= IFCAP_NETMAP;
+}
+
+void
+nm_os_onexit(struct ifnet *ifp)
+{
+ struct netmap_adapter *na = NA(ifp);
+
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+}
+
extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */
struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index a8da6cea06b0..e1e40f68b2dc 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -89,117 +89,6 @@ __FBSDID("$FreeBSD$");
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
-/*
- * FreeBSD mbuf allocator/deallocator in emulation mode:
- */
-#if __FreeBSD_version < 1100000
-
-/*
- * For older versions of FreeBSD:
- *
- * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
- * so that the destructor, if invoked, will not free the packet.
- * In principle we should set the destructor only on demand,
- * but since there might be a race we better do it on allocation.
- * As a consequence, we also need to set the destructor or we
- * would leak buffers.
- */
-
-/* mbuf destructor, also need to change the type to EXT_EXTREF,
- * add an M_NOFREE flag, and then clear the flag and
- * chain into uma_zfree(zone_pack, mf)
- * (or reinstall the buffer ?)
- */
-#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- (m)->m_ext.ext_free = (void *)fn; \
- (m)->m_ext.ext_type = EXT_EXTREF; \
-} while (0)
-
-static int
-void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
-{
- /* restore original mbuf */
- m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
- m->m_ext.ext_arg1 = NULL;
- m->m_ext.ext_type = EXT_PACKET;
- m->m_ext.ext_free = NULL;
- if (MBUF_REFCNT(m) == 0)
- SET_MBUF_REFCNT(m, 1);
- uma_zfree(zone_pack, m);
-
- return 0;
-}
-
-static inline struct mbuf *
-nm_os_get_mbuf(struct ifnet *ifp, int len)
-{
- struct mbuf *m;
-
- (void)ifp;
- m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
- if (m) {
- /* m_getcl() (mb_ctor_mbuf) has an assert that checks that
- * M_NOFREE flag is not specified as third argument,
- * so we have to set M_NOFREE after m_getcl(). */
- m->m_flags |= M_NOFREE;
- m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
- m->m_ext.ext_free = (void *)void_mbuf_dtor;
- m->m_ext.ext_type = EXT_EXTREF;
- ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m));
- }
- return m;
-}
-
-#else /* __FreeBSD_version >= 1100000 */
-
-/*
- * Newer versions of FreeBSD, using a straightforward scheme.
- *
- * We allocate mbufs with m_gethdr(), since the mbuf header is needed
- * by the driver. We also attach a customly-provided external storage,
- * which in this case is a netmap buffer. When calling m_extadd(), however
- * we pass a NULL address, since the real address (and length) will be
- * filled in by nm_os_generic_xmit_frame() right before calling
- * if_transmit().
- *
- * The dtor function does nothing, however we need it since mb_free_ext()
- * has a KASSERT(), checking that the mbuf dtor function is not NULL.
- */
-
-#if __FreeBSD_version <= 1200050
-static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
-#else /* __FreeBSD_version >= 1200051 */
-/* The arg1 and arg2 pointers argument were removed by r324446, which
- * in included since version 1200051. */
-static void void_mbuf_dtor(struct mbuf *m) { }
-#endif /* __FreeBSD_version >= 1200051 */
-
-#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- (m)->m_ext.ext_free = (fn != NULL) ? \
- (void *)fn : (void *)void_mbuf_dtor; \
-} while (0)
-
-static inline struct mbuf *
-nm_os_get_mbuf(struct ifnet *ifp, int len)
-{
- struct mbuf *m;
-
- (void)ifp;
- (void)len;
-
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- return m;
- }
-
- m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
- NULL, NULL, 0, EXT_NET_DRV);
-
- return m;
-}
-
-#endif /* __FreeBSD_version >= 1100000 */
-
#elif defined _WIN32
#include "win_glue.h"
@@ -1161,7 +1050,7 @@ generic_netmap_dtor(struct netmap_adapter *na)
}
D("Native netmap adapter %p restored", prev_na);
}
- NM_ATTACH_NA(ifp, prev_na);
+ NM_RESTORE_NA(ifp, prev_na);
/*
* netmap_detach_common(), that it's called after this function,
* overrides WNA(ifp) if na->ifp is not NULL.
@@ -1202,7 +1091,7 @@ generic_netmap_attach(struct ifnet *ifp)
}
#endif
- if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
@@ -1230,6 +1119,7 @@ generic_netmap_attach(struct ifnet *ifp)
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
+ na->rx_buf_maxsize = 32768;
na->nm_register = &generic_netmap_register;
na->nm_txsync = &generic_netmap_txsync;
na->nm_rxsync = &generic_netmap_rxsync;
@@ -1253,8 +1143,8 @@ generic_netmap_attach(struct ifnet *ifp)
return retval;
}
- gna->prev = NA(ifp); /* save old na */
- if (gna->prev != NULL) {
+ if (NM_NA_VALID(ifp)) {
+ gna->prev = NA(ifp); /* save old na */
netmap_adapter_get(gna->prev);
}
NM_ATTACH_NA(ifp, na);
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 862f9a43c5f1..4abd16b6d17f 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -275,6 +275,7 @@ struct netmap_adapter;
struct nm_bdg_fwd;
struct nm_bridge;
struct netmap_priv_d;
+struct nm_bdg_args;
/* os-specific NM_SELINFO_T initialzation/destruction functions */
void nm_os_selinfo_init(NM_SELINFO_T *);
@@ -305,6 +306,12 @@ void *nm_os_realloc(void *, size_t new_size, size_t old_size);
void nm_os_free(void *);
void nm_os_vfree(void *);
+/* os specific attach/detach enter/exit-netmap-mode routines */
+void nm_os_onattach(struct ifnet *);
+void nm_os_ondetach(struct ifnet *);
+void nm_os_onenter(struct ifnet *);
+void nm_os_onexit(struct ifnet *);
+
/* passes a packet up to the host stack.
* If the packet is sent (or dropped) immediately it returns NULL,
* otherwise it links the packet to prev and returns m.
@@ -313,7 +320,8 @@ void nm_os_vfree(void *);
*/
void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev);
-int nm_os_mbuf_has_offld(struct mbuf *m);
+int nm_os_mbuf_has_seg_offld(struct mbuf *m);
+int nm_os_mbuf_has_csum_offld(struct mbuf *m);
#include "netmap_mbq.h"
@@ -507,11 +515,10 @@ struct netmap_kring {
struct netmap_kring *pipe; /* if this is a pipe ring,
* pointer to the other end
*/
+ uint32_t pipe_tail; /* hwtail updated by the other end */
#endif /* WITH_PIPES */
-#ifdef WITH_VALE
int (*save_notify)(struct netmap_kring *kring, int flags);
-#endif
#ifdef WITH_MONITOR
/* array of krings that are monitoring this kring */
@@ -634,6 +641,7 @@ struct netmap_lut {
};
struct netmap_vp_adapter; // forward
+struct nm_bridge;
/* Struct to be filled by nm_config callbacks. */
struct nm_config_info {
@@ -645,6 +653,14 @@ struct nm_config_info {
};
/*
+ * default type for the magic field.
+ * May be overriden in glue code.
+ */
+#ifndef NM_OS_MAGIC
+#define NM_OS_MAGIC uint32_t
+#endif /* !NM_OS_MAGIC */
+
+/*
* The "struct netmap_adapter" extends the "struct adapter"
* (or equivalent) device descriptor.
* It contains all base fields needed to support netmap operation.
@@ -660,7 +676,7 @@ struct netmap_adapter {
* always exists and is at least 32 bits) contains a magic
* value which we can use to detect that the interface is good.
*/
- uint32_t magic;
+ NM_OS_MAGIC magic;
uint32_t na_flags; /* enabled, and other flags */
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
@@ -696,6 +712,8 @@ struct netmap_adapter {
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
+ u_int num_host_rx_rings; /* number of host receive rings */
+ u_int num_host_tx_rings; /* number of host transmit rings */
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
@@ -783,7 +801,6 @@ struct netmap_adapter {
int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
-#ifdef WITH_VALE
/*
* nm_bdg_attach() initializes the na_vp field to point
* to an adapter that can be attached to a VALE switch. If the
@@ -799,7 +816,8 @@ struct netmap_adapter {
* initializations
* Called with NMG_LOCK held.
*/
- int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
+ int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *,
+ struct nm_bridge *);
int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *);
/* adapter used to attach this adapter to a VALE switch (if any) */
@@ -807,7 +825,6 @@ struct netmap_adapter {
/* adapter used to attach the host rings of this adapter
* to a VALE switch (if any) */
struct netmap_vp_adapter *na_hostvp;
-#endif
/* standard refcount to control the lifetime of the adapter
* (it should be equal to the lifetime of the corresponding ifp)
@@ -843,6 +860,10 @@ struct netmap_adapter {
unsigned rx_buf_maxsize;
char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */
+
+#ifdef WITH_MONITOR
+ unsigned long monitor_id; /* debugging */
+#endif
};
static __inline u_int
@@ -866,6 +887,12 @@ nma_get_nrings(struct netmap_adapter *na, enum txrx t)
return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
}
+static __inline u_int
+nma_get_host_nrings(struct netmap_adapter *na, enum txrx t)
+{
+ return (t == NR_TX ? na->num_host_tx_rings : na->num_host_rx_rings);
+}
+
static __inline void
nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
{
@@ -875,6 +902,15 @@ nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
na->num_rx_rings = v;
}
+static __inline void
+nma_set_host_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
+{
+ if (t == NR_TX)
+ na->num_host_tx_rings = v;
+ else
+ na->num_host_rx_rings = v;
+}
+
static __inline struct netmap_kring**
NMR(struct netmap_adapter *na, enum txrx t)
{
@@ -964,13 +1000,22 @@ struct netmap_generic_adapter { /* emulated device */
};
#endif /* WITH_GENERIC */
-static __inline int
+static __inline u_int
netmap_real_rings(struct netmap_adapter *na, enum txrx t)
{
- return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
+ return nma_get_nrings(na, t) +
+ !!(na->na_flags & NAF_HOST_RINGS) * nma_get_host_nrings(na, t);
}
-#ifdef WITH_VALE
+/* account for fake rings */
+static __inline u_int
+netmap_all_rings(struct netmap_adapter *na, enum txrx t)
+{
+ return max(nma_get_nrings(na, t) + 1, netmap_real_rings(na, t));
+}
+
+int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *);
struct nm_bdg_polling_state;
/*
* Bridge wrapper for non VALE ports attached to a VALE switch.
@@ -1038,12 +1083,12 @@ struct netmap_bwrap_adapter {
int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_polling(struct nmreq_header *hdr);
-int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
+int netmap_bdg_list(struct nmreq_header *hdr);
+
+#ifdef WITH_VALE
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
int nm_vi_destroy(const char *name);
-int netmap_bdg_list(struct nmreq_header *hdr);
-
#else /* !WITH_VALE */
#define netmap_vi_create(hdr, a) (EOPNOTSUPP)
#endif /* WITH_VALE */
@@ -1262,7 +1307,6 @@ const char *netmap_bdg_name(struct netmap_vp_adapter *);
#define netmap_ifp_to_vp(_ifp) NULL
#define netmap_ifp_to_host_vp(_ifp) NULL
#define netmap_bdg_idx(_vp) -1
-#define netmap_bdg_name(_vp) NULL
#endif /* WITH_VALE */
static inline int
@@ -1293,67 +1337,8 @@ nm_update_hostrings_mode(struct netmap_adapter *na)
na->rx_rings[na->num_rx_rings]->nr_pending_mode;
}
-/* set/clear native flags and if_transmit/netdev_ops */
-static inline void
-nm_set_native_flags(struct netmap_adapter *na)
-{
- struct ifnet *ifp = na->ifp;
-
- /* We do the setup for intercepting packets only if we are the
- * first user of this adapapter. */
- if (na->active_fds > 0) {
- return;
- }
-
- na->na_flags |= NAF_NETMAP_ON;
-#ifdef IFCAP_NETMAP /* or FreeBSD ? */
- ifp->if_capenable |= IFCAP_NETMAP;
-#endif
-#if defined (__FreeBSD__)
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-#elif defined (_WIN32)
- (void)ifp; /* prevent a warning */
-#elif defined (linux)
- na->if_transmit = (void *)ifp->netdev_ops;
- ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
- ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
- ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
-#endif /* linux */
- nm_update_hostrings_mode(na);
-}
-
-static inline void
-nm_clear_native_flags(struct netmap_adapter *na)
-{
- struct ifnet *ifp = na->ifp;
-
- /* We undo the setup for intercepting packets only if we are the
- * last user of this adapapter. */
- if (na->active_fds > 0) {
- return;
- }
-
- nm_update_hostrings_mode(na);
-
-#if defined(__FreeBSD__)
- ifp->if_transmit = na->if_transmit;
-#elif defined(_WIN32)
- (void)ifp; /* prevent a warning */
-#else
- ifp->netdev_ops = (void *)na->if_transmit;
- ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
-#endif
- na->na_flags &= ~NAF_NETMAP_ON;
-#ifdef IFCAP_NETMAP /* or FreeBSD ? */
- ifp->if_capenable &= ~IFCAP_NETMAP;
-#endif
-}
-
-#ifdef linux
-int netmap_linux_config(struct netmap_adapter *na,
- struct nm_config_info *info);
-#endif /* linux */
+void nm_set_native_flags(struct netmap_adapter *);
+void nm_clear_native_flags(struct netmap_adapter *);
/*
* nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
@@ -1458,7 +1443,6 @@ int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
-#ifdef WITH_VALE
/*
* The following bridge-related functions are used by other
* kernel modules.
@@ -1473,39 +1457,49 @@ typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
+typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
+ struct ifnet *ifp, struct netmap_mem_d *nmd,
+ struct netmap_vp_adapter **ret);
+typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
+ bdg_vp_create_fn_t vp_create;
+ bdg_bwrap_attach_fn_t bwrap_attach;
+ char name[IFNAMSIZ];
};
-
-uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- struct netmap_vp_adapter *, void *private_data);
+int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
+int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
#define NM_BRIDGES 8 /* number of bridges */
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
-/* these are redefined in case of no VALE support */
-int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create);
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
-int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token);
int netmap_bdg_config(struct nm_ifreq *nifr);
-void *netmap_bdg_create(const char *bdg_name, int *return_status);
-int netmap_bdg_destroy(const char *bdg_name, void *auth_token);
+
+#ifdef WITH_VALE
+uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+ struct netmap_vp_adapter *, void *private_data);
+
+/* these are redefined in case of no VALE support */
+int netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create);
+void *netmap_vale_create(const char *bdg_name, int *return_status);
+int netmap_vale_destroy(const char *bdg_name, void *auth_token);
#else /* !WITH_VALE */
-#define netmap_get_bdg_na(_1, _2, _3, _4) 0
-#define netmap_init_bridges(_1) 0
-#define netmap_uninit_bridges()
-#define netmap_bdg_regops(_1, _2) EINVAL
+#define netmap_bdg_learning(_1, _2, _3, _4) 0
+#define netmap_get_vale_na(_1, _2, _3, _4) 0
+#define netmap_bdg_create(_1, _2) NULL
+#define netmap_bdg_destroy(_1, _2) 0
#endif /* !WITH_VALE */
#ifdef WITH_PIPES
@@ -1611,6 +1605,7 @@ enum { /* verbose flags */
extern int netmap_txsync_retry;
extern int netmap_flags;
+extern int netmap_generic_hwcsum;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
extern int netmap_generic_rings;
@@ -1620,12 +1615,18 @@ extern int netmap_generic_txqdisc;
extern int ptnetmap_tx_workers;
/*
- * NA returns a pointer to the struct netmap adapter from the ifp,
- * WNA is used to write it.
+ * NA returns a pointer to the struct netmap adapter from the ifp.
+ * WNA is os-specific and must be defined in glue code.
*/
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
/*
+ * we provide a default implementation of NM_ATTACH_NA/NM_DETACH_NA
+ * based on the WNA field.
+ * Glue code may override this by defining its own NM_ATTACH_NA
+ */
+#ifndef NM_ATTACH_NA
+/*
* On old versions of FreeBSD, NA(ifp) is a pspare. On linux we
* overload another pointer in the netdev.
*
@@ -1643,6 +1644,12 @@ extern int ptnetmap_tx_workers;
NA(ifp)->magic = \
((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC; \
} while(0)
+#define NM_RESTORE_NA(ifp, na) WNA(ifp) = na;
+
+#define NM_DETACH_NA(ifp) do { WNA(ifp) = NULL; } while (0)
+#define NM_NA_CLASH(ifp) (NA(ifp) && !NM_NA_VALID(ifp))
+#endif /* !NM_ATTACH_NA */
+
#define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
@@ -1752,17 +1759,24 @@ netmap_unload_map(struct netmap_adapter *na,
}
}
+#ifdef NETMAP_LINUX_HAVE_DMASYNC
+static inline void
+netmap_sync_map_cpu(struct netmap_adapter *na,
+ bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
+{
+ if (*map) {
+ dma_sync_single_for_cpu(na->pdev, *map, sz,
+ (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+ }
+}
+
static inline void
-netmap_sync_map(struct netmap_adapter *na,
+netmap_sync_map_dev(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
{
if (*map) {
- if (t == NR_RX)
- dma_sync_single_for_cpu(na->pdev, *map, sz,
- DMA_FROM_DEVICE);
- else
- dma_sync_single_for_device(na->pdev, *map, sz,
- DMA_TO_DEVICE);
+ dma_sync_single_for_device(na->pdev, *map, sz,
+ (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
}
}
@@ -1780,6 +1794,10 @@ netmap_reload_map(struct netmap_adapter *na,
*map = dma_map_single(na->pdev, buf, sz,
DMA_BIDIRECTIONAL);
}
+#else /* !NETMAP_LINUX_HAVE_DMASYNC */
+#define netmap_sync_map_cpu(na, tag, map, sz, t)
+#define netmap_sync_map_dev(na, tag, map, sz, t)
+#endif /* NETMAP_LINUX_HAVE_DMASYNC */
#endif /* linux */
@@ -2221,6 +2239,119 @@ void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
#endif /* WITH_PTNETMAP_GUEST */
+#ifdef __FreeBSD__
+/*
+ * FreeBSD mbuf allocator/deallocator in emulation mode:
+ */
+#if __FreeBSD_version < 1100000
+
+/*
+ * For older versions of FreeBSD:
+ *
+ * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
+ * so that the destructor, if invoked, will not free the packet.
+ * In principle we should set the destructor only on demand,
+ * but since there might be a race we better do it on allocation.
+ * As a consequence, we also need to set the destructor or we
+ * would leak buffers.
+ */
+
+/* mbuf destructor, also need to change the type to EXT_EXTREF,
+ * add an M_NOFREE flag, and then clear the flag and
+ * chain into uma_zfree(zone_pack, mf)
+ * (or reinstall the buffer ?)
+ */
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (void *)fn; \
+ (m)->m_ext.ext_type = EXT_EXTREF; \
+} while (0)
+
+static int
+void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
+{
+ /* restore original mbuf */
+ m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
+ m->m_ext.ext_arg1 = NULL;
+ m->m_ext.ext_type = EXT_PACKET;
+ m->m_ext.ext_free = NULL;
+ if (MBUF_REFCNT(m) == 0)
+ SET_MBUF_REFCNT(m, 1);
+ uma_zfree(zone_pack, m);
+
+ return 0;
+}
+
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+ struct mbuf *m;
+
+ (void)ifp;
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m) {
+ /* m_getcl() (mb_ctor_mbuf) has an assert that checks that
+ * M_NOFREE flag is not specified as third argument,
+ * so we have to set M_NOFREE after m_getcl(). */
+ m->m_flags |= M_NOFREE;
+ m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
+ m->m_ext.ext_free = (void *)void_mbuf_dtor;
+ m->m_ext.ext_type = EXT_EXTREF;
+ ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m));
+ }
+ return m;
+}
+
+#else /* __FreeBSD_version >= 1100000 */
+
+/*
+ * Newer versions of FreeBSD, using a straightforward scheme.
+ *
+ * We allocate mbufs with m_gethdr(), since the mbuf header is needed
+ * by the driver. We also attach a customly-provided external storage,
+ * which in this case is a netmap buffer. When calling m_extadd(), however
+ * we pass a NULL address, since the real address (and length) will be
+ * filled in by nm_os_generic_xmit_frame() right before calling
+ * if_transmit().
+ *
+ * The dtor function does nothing, however we need it since mb_free_ext()
+ * has a KASSERT(), checking that the mbuf dtor function is not NULL.
+ */
+
+#if __FreeBSD_version <= 1200050
+static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
+#else /* __FreeBSD_version >= 1200051 */
+/* The arg1 and arg2 pointers argument were removed by r324446, which
+ * in included since version 1200051. */
+static void void_mbuf_dtor(struct mbuf *m) { }
+#endif /* __FreeBSD_version >= 1200051 */
+
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (fn != NULL) ? \
+ (void *)fn : (void *)void_mbuf_dtor; \
+} while (0)
+
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+ struct mbuf *m;
+
+ (void)ifp;
+ (void)len;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ return m;
+ }
+
+ m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
+ NULL, NULL, 0, EXT_NET_DRV);
+
+ return m;
+}
+
+#endif /* __FreeBSD_version >= 1100000 */
+#endif /* __FreeBSD__ */
+
struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
int nmreq_checkduplicate(struct nmreq_option *);
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index edf01a636151..803f30891c6b 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -1845,7 +1845,7 @@ netmap_free_rings(struct netmap_adapter *na)
for_rx_tx(t) {
u_int i;
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
@@ -1884,7 +1884,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
for_rx_tx(t) {
u_int i;
- for (i = 0; i <= nma_get_nrings(na, t); i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int len, ndesc;
@@ -1922,7 +1922,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
- ND("initializing slots for %s_ring", nm_txrx2str(txrx));
+ ND("initializing slots for %s_ring", nm_txrx2str(t));
if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
ND("allocating buffers for %s", kring->name);
@@ -1980,7 +1980,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
ntot = 0;
for_rx_tx(t) {
/* account for the (eventually fake) host rings */
- n[t] = nma_get_nrings(na, t) + 1;
+ n[t] = netmap_all_rings(na, t);
ntot += n[t];
}
/*
@@ -2654,14 +2654,14 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
/* point each kring to the corresponding backend ring */
nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
- for (i = 0; i <= na->num_tx_rings; i++) {
+ for (i = 0; i < netmap_all_rings(na, NR_TX); i++) {
struct netmap_kring *kring = na->tx_rings[i];
if (kring->ring)
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp + nifp->ring_ofs[i]);
}
- for (i = 0; i <= na->num_rx_rings; i++) {
+ for (i = 0; i < netmap_all_rings(na, NR_RX); i++) {
struct netmap_kring *kring = na->rx_rings[i];
if (kring->ring)
continue;
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
index 8b118ced9ad5..7e88ae02f9ba 100644
--- a/sys/dev/netmap/netmap_monitor.c
+++ b/sys/dev/netmap/netmap_monitor.c
@@ -152,6 +152,12 @@ netmap_monitor_txsync(struct netmap_kring *kring, int flags)
static int
netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
{
+ struct netmap_monitor_adapter *mna =
+ (struct netmap_monitor_adapter *)kring->na;
+ if (unlikely(mna->priv.np_na == NULL)) {
+ /* parent left netmap mode */
+ return EIO;
+ }
ND("%s %x", kring->name, flags);
kring->nr_hwcur = kring->rhead;
mb();
@@ -164,11 +170,20 @@ static int
netmap_monitor_krings_create(struct netmap_adapter *na)
{
int error = netmap_krings_create(na, 0);
+ enum txrx t;
+
if (error)
return error;
/* override the host rings callbacks */
- na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync;
- na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync;
+ for_rx_tx(t) {
+ int i;
+ u_int first = nma_get_nrings(na, t);
+ for (i = 0; i < nma_get_host_nrings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[first + i];
+ kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
+ netmap_monitor_rxsync;
+ }
+ }
return 0;
}
@@ -244,6 +259,48 @@ static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
static int netmap_monitor_parent_notify(struct netmap_kring *, int);
+static void
+nm_monitor_intercept_callbacks(struct netmap_kring *kring)
+{
+ ND("intercept callbacks on %s", kring->name);
+ kring->mon_sync = kring->nm_sync;
+ kring->mon_notify = kring->nm_notify;
+ if (kring->tx == NR_TX) {
+ kring->nm_sync = netmap_monitor_parent_txsync;
+ } else {
+ kring->nm_sync = netmap_monitor_parent_rxsync;
+ kring->nm_notify = netmap_monitor_parent_notify;
+ kring->mon_tail = kring->nr_hwtail;
+ }
+}
+
+static void
+nm_monitor_restore_callbacks(struct netmap_kring *kring)
+{
+ ND("restoring callbacks on %s", kring->name);
+ kring->nm_sync = kring->mon_sync;
+ kring->mon_sync = NULL;
+ if (kring->tx == NR_RX) {
+ kring->nm_notify = kring->mon_notify;
+ }
+ kring->mon_notify = NULL;
+}
+
+static struct netmap_kring *
+nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
+{
+ struct netmap_adapter *na = mkring->na;
+ struct netmap_kring *kring = mkring;
+ struct netmap_zmon_list *z = &kring->zmon_list[t];
+ /* reach the head of the list */
+ while (nm_is_zmon(na) && z->prev != NULL) {
+ kring = z->prev;
+ na = kring->na;
+ z = &kring->zmon_list[t];
+ }
+ return nm_is_zmon(na) ? NULL : kring;
+}
+
/* add the monitor mkring to the list of monitors of kring.
* If this is the first monitor, intercept the callbacks
*/
@@ -254,51 +311,34 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
enum txrx t = kring->tx;
struct netmap_zmon_list *z = &kring->zmon_list[t];
struct netmap_zmon_list *mz = &mkring->zmon_list[t];
+ struct netmap_kring *ikring = kring;
/* a zero-copy monitor which is not the first in the list
* must monitor the previous monitor
*/
if (zmon && z->prev != NULL)
- kring = z->prev;
+ ikring = z->prev; /* tail of the list */
/* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
- if (nm_monitor_none(kring)) {
- /* this is the first monitor, intercept callbacks */
- ND("intercept callbacks on %s", kring->name);
- kring->mon_sync = kring->nm_sync;
- kring->mon_notify = kring->nm_notify;
- if (kring->tx == NR_TX) {
- kring->nm_sync = netmap_monitor_parent_txsync;
- } else {
- kring->nm_sync = netmap_monitor_parent_rxsync;
- kring->nm_notify = netmap_monitor_parent_notify;
- kring->mon_tail = kring->nr_hwtail;
- }
+ if (nm_monitor_none(ikring)) {
+ /* this is the first monitor, intercept the callbacks */
+ ND("%s: intercept callbacks on %s", mkring->name, ikring->name);
+ nm_monitor_intercept_callbacks(ikring);
}
if (zmon) {
/* append the zmon to the list */
- struct netmap_monitor_adapter *mna =
- (struct netmap_monitor_adapter *)mkring->na;
- struct netmap_adapter *pna;
-
- if (z->prev != NULL)
- z->prev->zmon_list[t].next = mkring;
- mz->prev = z->prev;
- z->prev = mkring;
- if (z->next == NULL)
- z->next = mkring;
-
- /* grap a reference to the previous netmap adapter
+ ikring->zmon_list[t].next = mkring;
+ z->prev = mkring; /* new tail */
+ mz->prev = ikring;
+ mz->next = NULL;
+ /* grab a reference to the previous netmap adapter
* in the chain (this may be the monitored port
* or another zero-copy monitor)
*/
- pna = kring->na;
- netmap_adapter_get(pna);
- netmap_adapter_put(mna->priv.np_na);
- mna->priv.np_na = pna;
+ netmap_adapter_get(ikring->na);
} else {
/* make sure the monitor array exists and is big enough */
error = nm_monitor_alloc(kring, kring->n_monitors + 1);
@@ -318,29 +358,50 @@ out:
* If this is the last monitor, restore the original callbacks
*/
static void
-netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
+netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
{
- struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
int zmon = nm_is_zmon(mkring->na);
+ struct netmap_zmon_list *mz = &mkring->zmon_list[t];
+ struct netmap_kring *ikring = kring;
- if (zmon && mz->prev != NULL)
- kring = mz->prev;
+ if (zmon) {
+ /* get to the head of the list */
+ kring = nm_zmon_list_head(mkring, t);
+ ikring = mz->prev;
+ }
- /* synchronize with concurrently running nm_sync()s */
- nm_kr_stop(kring, NM_KR_LOCKED);
+ /* synchronize with concurrently running nm_sync()s
+ * if kring is NULL (orphaned list) the monitored port
+ * has exited netmap mode, so there is nothing to stop
+ */
+ if (kring != NULL)
+ nm_kr_stop(kring, NM_KR_LOCKED);
if (zmon) {
/* remove the monitor from the list */
- if (mz->prev != NULL)
- mz->prev->zmon_list[kring->tx].next = mz->next;
- else
- kring->zmon_list[kring->tx].next = mz->next;
if (mz->next != NULL) {
- mz->next->zmon_list[kring->tx].prev = mz->prev;
- } else {
- kring->zmon_list[kring->tx].prev = mz->prev;
+ mz->next->zmon_list[t].prev = mz->prev;
+ /* we also need to let the next monitor drop the
+ * reference to us and grab the reference to the
+ * previous ring owner, instead
+ */
+ if (mz->prev != NULL)
+ netmap_adapter_get(mz->prev->na);
+ netmap_adapter_put(mkring->na);
+ } else if (kring != NULL) {
+ /* in the monitored kring, prev is actually the
+ * pointer to the tail of the list
+ */
+ kring->zmon_list[t].prev =
+ (mz->prev != kring ? mz->prev : NULL);
+ }
+ if (mz->prev != NULL) {
+ netmap_adapter_put(mz->prev->na);
+ mz->prev->zmon_list[t].next = mz->next;
}
+ mz->prev = NULL;
+ mz->next = NULL;
} else {
/* this is a copy monitor */
uint32_t mon_pos = mkring->mon_pos[kring->tx];
@@ -356,21 +417,13 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
}
}
- if (nm_monitor_none(kring)) {
+ if (ikring != NULL && nm_monitor_none(ikring)) {
/* this was the last monitor, restore the callbacks */
- ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
- kring->mon_sync);
- kring->nm_sync = kring->mon_sync;
- kring->mon_sync = NULL;
- if (kring->tx == NR_RX) {
- ND("%s: restoring notify on %s: %p",
- mkring->name, kring->name, kring->mon_notify);
- kring->nm_notify = kring->mon_notify;
- kring->mon_notify = NULL;
- }
+ nm_monitor_restore_callbacks(ikring);
}
- nm_kr_start(kring);
+ if (kring != NULL)
+ nm_kr_start(kring);
}
@@ -389,9 +442,9 @@ netmap_monitor_stop(struct netmap_adapter *na)
for_rx_tx(t) {
u_int i;
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
- struct netmap_kring *zkring;
+ struct netmap_zmon_list *z = &kring->zmon_list[t];
u_int j;
for (j = 0; j < kring->n_monitors; j++) {
@@ -404,29 +457,33 @@ netmap_monitor_stop(struct netmap_adapter *na)
netmap_adapter_put(mna->priv.np_na);
mna->priv.np_na = NULL;
}
+ kring->monitors[j] = NULL;
}
- zkring = kring->zmon_list[kring->tx].next;
- if (zkring != NULL) {
- struct netmap_monitor_adapter *next =
- (struct netmap_monitor_adapter *)zkring->na;
- struct netmap_monitor_adapter *this =
- (struct netmap_monitor_adapter *)na;
- struct netmap_adapter *pna = this->priv.np_na;
- /* let the next monitor forget about us */
- if (next->priv.np_na != NULL) {
- netmap_adapter_put(next->priv.np_na);
+ if (!nm_is_zmon(na)) {
+ /* we are the head of at most one list */
+ struct netmap_kring *zkring;
+ for (zkring = z->next; zkring != NULL;
+ zkring = zkring->zmon_list[t].next)
+ {
+ struct netmap_monitor_adapter *next =
+ (struct netmap_monitor_adapter *)zkring->na;
+ /* let the monitor forget about us */
+ netmap_adapter_put(next->priv.np_na); /* nop if null */
+ next->priv.np_na = NULL;
}
- if (pna != NULL && nm_is_zmon(na)) {
- /* we are a monitor ourselves and we may
- * need to pass down the reference to
- * the previous adapter in the chain
- */
- netmap_adapter_get(pna);
- next->priv.np_na = pna;
- continue;
- }
- next->priv.np_na = NULL;
+ /* orhpan the zmon list */
+ if (z->next != NULL)
+ z->next->zmon_list[t].prev = NULL;
+ z->next = NULL;
+ z->prev = NULL;
+ }
+
+ if (!nm_monitor_none(kring)) {
+
+ kring->n_monitors = 0;
+ nm_monitor_dealloc(kring);
+ nm_monitor_restore_callbacks(kring);
}
}
}
@@ -455,7 +512,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
return ENXIO;
}
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
mkring = NMR(na, t)[i];
if (!nm_kring_pending_on(mkring))
continue;
@@ -477,7 +534,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
mkring = NMR(na, t)[i];
if (!nm_kring_pending_off(mkring))
continue;
@@ -495,7 +552,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
continue;
if (mna->flags & nm_txrx2flag(s)) {
kring = NMR(pna, s)[i];
- netmap_monitor_del(mkring, kring);
+ netmap_monitor_del(mkring, kring, s);
}
}
}
@@ -593,6 +650,7 @@ netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
ms->len = s->len;
s->len = tmp;
+ ms->flags = s->flags;
s->flags |= NS_BUF_CHANGED;
beg = nm_next(beg, lim);
@@ -710,6 +768,7 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_
memcpy(dst, src, copy_len);
ms->len = copy_len;
+ ms->flags = s->flags;
sent++;
beg = nm_next(beg, lim);
@@ -836,7 +895,6 @@ netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct ifnet *ifp = NULL;
int error;
int zcopy = (req->nr_flags & NR_ZCOPY_MON);
- char monsuff[10] = "";
if (zcopy) {
req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
@@ -890,14 +948,11 @@ netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
D("ringid error");
goto free_out;
}
- if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
- snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
- }
- snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
- monsuff,
+ snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
zcopy ? "z" : "",
(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
- (req->nr_flags & NR_MONITOR_TX) ? "t" : "");
+ (req->nr_flags & NR_MONITOR_TX) ? "t" : "",
+ pna->monitor_id++);
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index f2ea3815f0e3..fbeee4e641b0 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (C) 2014-2016 Giuseppe Lettieri
+ * Copyright (C) 2014-2018 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -185,8 +185,9 @@ int
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
{
struct netmap_kring *rxkring = txkring->pipe;
- u_int k, lim = txkring->nkr_num_slots - 1;
+ u_int k, lim = txkring->nkr_num_slots - 1, nk;
int m; /* slots to transfer */
+ int complete; /* did we see a complete packet ? */
struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
@@ -194,6 +195,9 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail);
+ /* update the hwtail */
+ txkring->nr_hwtail = txkring->pipe_tail;
+
m = txkring->rhead - txkring->nr_hwcur; /* new slots */
if (m < 0)
m += txkring->nkr_num_slots;
@@ -203,29 +207,29 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
return 0;
}
- for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
+ for (k = txkring->nr_hwcur, nk = lim + 1, complete = 0; m;
+ m--, k = nm_next(k, lim), nk = (complete ? k : nk)) {
struct netmap_slot *rs = &rxring->slot[k];
struct netmap_slot *ts = &txring->slot[k];
- rs->len = ts->len;
- rs->ptr = ts->ptr;
-
+ *rs = *ts;
if (ts->flags & NS_BUF_CHANGED) {
- rs->buf_idx = ts->buf_idx;
- rs->flags |= NS_BUF_CHANGED;
ts->flags &= ~NS_BUF_CHANGED;
}
+ complete = !(ts->flags & NS_MOREFRAG);
}
- mb(); /* make sure the slots are updated before publishing them */
- rxkring->nr_hwtail = k;
txkring->nr_hwcur = k;
ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail, k);
- rxkring->nm_notify(rxkring, 0);
+ if (likely(nk <= lim)) {
+ mb(); /* make sure the slots are updated before publishing them */
+ rxkring->pipe_tail = nk; /* only publish complete packets */
+ rxkring->nm_notify(rxkring, 0);
+ }
return 0;
}
@@ -243,6 +247,9 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
+ /* update the hwtail */
+ rxkring->nr_hwtail = rxkring->pipe_tail;
+
m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
if (m < 0)
m += rxkring->nkr_num_slots;
@@ -264,7 +271,7 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
}
mb(); /* make sure the slots are updated before publishing them */
- txkring->nr_hwtail = nm_prev(k, lim);
+ txkring->pipe_tail = nm_prev(k, lim);
rxkring->nr_hwcur = k;
ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
@@ -346,14 +353,19 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
if (error)
goto del_krings1;
- /* cross link the krings */
+ /* cross link the krings and initialize the pipe_tails */
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, t); i++) {
- NMR(na, t)[i]->pipe = NMR(ona, r)[i];
- NMR(ona, r)[i]->pipe = NMR(na, t)[i];
+ struct netmap_kring *k1 = NMR(na, t)[i],
+ *k2 = NMR(ona, r)[i];
+ k1->pipe = k2;
+ k2->pipe = k1;
/* mark all peer-adapter rings as fake */
- NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING;
+ k2->nr_kflags |= NKR_FAKERING;
+ /* init tails */
+ k1->pipe_tail = k1->nr_hwtail;
+ k2->pipe_tail = k2->nr_hwtail;
}
}
@@ -436,6 +448,16 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
+ kring->nr_mode = NKR_NETMAP_ON;
+ if ((kring->nr_kflags & NKR_FAKERING) &&
+ (kring->pipe->nr_kflags & NKR_FAKERING)) {
+ /* this is a re-open of a pipe
+ * end-point kept alive by the other end.
+ * We need to leave everything as it is
+ */
+ continue;
+ }
+
/* copy the buffers from the non-fake ring */
if (kring->nr_kflags & NKR_FAKERING) {
sring = kring->pipe;
@@ -556,10 +578,10 @@ cleanup:
if (ring == NULL)
continue;
- if (kring->nr_hwtail == kring->nr_hwcur)
- ring->slot[kring->nr_hwtail].buf_idx = 0;
+ if (kring->tx == NR_RX)
+ ring->slot[kring->pipe_tail].buf_idx = 0;
- for (j = nm_next(kring->nr_hwtail, lim);
+ for (j = nm_next(kring->pipe_tail, lim);
j != kring->nr_hwcur;
j = nm_next(j, lim))
{
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index 94ffc2e8df7d..2c526753272c 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -27,37 +27,6 @@
*/
-/*
- * This module implements the VALE switch for netmap
-
---- VALE SWITCH ---
-
-NMG_LOCK() serializes all modifications to switches and ports.
-A switch cannot be deleted until all ports are gone.
-
-For each switch, an SX lock (RWlock on linux) protects
-deletion of ports. When configuring or deleting a new port, the
-lock is acquired in exclusive mode (after holding NMG_LOCK).
-When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
-The lock is held throughout the entire forwarding cycle,
-during which the thread may incur in a page fault.
-Hence it is important that sleepable shared locks are used.
-
-On the rx ring, the per-port lock is grabbed initially to reserve
-a number of slot in the ring, then the lock is released,
-packets are copied from source to destination, and then
-the lock is acquired again and the receive ring is updated.
-(A similar thing is done on the tx ring for NIC and host stack
-ports attached to the switch)
-
- */
-
-/*
- * OS-specific code that is used only within this file.
- * Other OS-specific code that must be accessed by drivers
- * is present in netmap_kern.h
- */
-
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD$");
@@ -81,18 +50,7 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#include <sys/refcount.h>
-
-
-#define BDG_RWLOCK_T struct rwlock // struct rwlock
-
-#define BDG_RWINIT(b) \
- rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
-#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
-#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
-#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
-#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
-#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
-#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
+#include <sys/smp.h>
#elif defined(linux)
@@ -120,6 +78,7 @@ __FBSDID("$FreeBSD$");
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
+#include <dev/netmap/netmap_bdg.h>
#ifdef WITH_VALE
@@ -143,15 +102,11 @@ __FBSDID("$FreeBSD$");
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
-#define NM_BDG_HASH 1024 /* forwarding table entries */
#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
-#define NM_MULTISEG 64 /* max size of a chain of bufs */
/* actual size of the tables */
-#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
+#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
-/* Default size for the Maximum Frame Size. */
-#define NM_BDG_MFS_DEFAULT 1514
/*
@@ -168,8 +123,9 @@ SYSEND;
static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
-static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
-static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
+static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
+ struct nm_bridge *);
+static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
* For each output interface, nm_bdg_q is used to construct a list.
@@ -182,98 +138,16 @@ struct nm_bdg_q {
uint32_t bq_len; /* number of buffers */
};
-/* XXX revise this */
-struct nm_hash_ent {
- uint64_t mac; /* the top 2 bytes are the epoch */
- uint64_t ports;
-};
-
/* Holds the default callbacks */
-static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL};
-
-/*
- * nm_bridge is a descriptor for a VALE switch.
- * Interfaces for a bridge are all in bdg_ports[].
- * The array has fixed size, an empty entry does not terminate
- * the search, but lookups only occur on attach/detach so we
- * don't mind if they are slow.
- *
- * The bridge is non blocking on the transmit ports: excess
- * packets are dropped if there is no room on the output port.
- *
- * bdg_lock protects accesses to the bdg_ports array.
- * This is a rw lock (or equivalent).
- */
-#define NM_BDG_IFNAMSIZ IFNAMSIZ
-struct nm_bridge {
- /* XXX what is the proper alignment/layout ? */
- BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
- int bdg_namelen;
- uint32_t bdg_active_ports;
- char bdg_basename[NM_BDG_IFNAMSIZ];
-
- /* Indexes of active ports (up to active_ports)
- * and all other remaining ports.
- */
- uint32_t bdg_port_index[NM_BDG_MAXPORTS];
- /* used by netmap_bdg_detach_common() */
- uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS];
-
- struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
-
- /*
- * Programmable lookup functions to figure out the destination port.
- * It returns either of an index of the destination port,
- * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
- * forward this packet. ring_nr is the source ring index, and the
- * function may overwrite this value to forward this packet to a
- * different ring index.
- * The function is set by netmap_bdg_regops().
- */
- struct netmap_bdg_ops *bdg_ops;
-
- /*
- * Contains the data structure used by the bdg_ops.lookup function.
- * By default points to *ht which is allocated on attach and used by the default lookup
- * otherwise will point to the data structure received by netmap_bdg_regops().
- */
- void *private_data;
- struct nm_hash_ent *ht;
-
- /* Currently used to specify if the bridge is still in use while empty and
- * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
- * and netmap_bdg_create().
- */
-#define NM_BDG_ACTIVE 1
-#define NM_BDG_EXCLUSIVE 2
- uint8_t bdg_flags;
-
-
-#ifdef CONFIG_NET_NS
- struct net *ns;
-#endif /* CONFIG_NET_NS */
+struct netmap_bdg_ops vale_bdg_ops = {
+ .lookup = netmap_bdg_learning,
+ .config = NULL,
+ .dtor = NULL,
+ .vp_create = netmap_vp_create,
+ .bwrap_attach = netmap_vale_bwrap_attach,
+ .name = NM_BDG_NAME,
};
-const char*
-netmap_bdg_name(struct netmap_vp_adapter *vp)
-{
- struct nm_bridge *b = vp->na_bdg;
- if (b == NULL)
- return NULL;
- return b->bdg_basename;
-}
-
-
-#ifndef CONFIG_NET_NS
-/*
- * XXX in principle nm_bridges could be created dynamically
- * Right now we have a static array and deletions are protected
- * by an exclusive lock.
- */
-static struct nm_bridge *nm_bridges;
-#endif /* !CONFIG_NET_NS */
-
-
/*
* this is a slightly optimized copy routine which rounds
* to multiple of 64 bytes and is often faster than dealing
@@ -304,107 +178,6 @@ pkt_copy(void *_src, void *_dst, int l)
}
-static int
-nm_is_id_char(const char c)
-{
- return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_');
-}
-
-/* Validate the name of a VALE bridge port and return the
- * position of the ":" character. */
-static int
-nm_vale_name_validate(const char *name)
-{
- int colon_pos = -1;
- int i;
-
- if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
- return -1;
- }
-
- for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
- if (name[i] == ':') {
- colon_pos = i;
- break;
- } else if (!nm_is_id_char(name[i])) {
- return -1;
- }
- }
-
- if (strlen(name) - colon_pos > IFNAMSIZ) {
- /* interface name too long */
- return -1;
- }
-
- return colon_pos;
-}
-
-/*
- * locate a bridge among the existing ones.
- * MUST BE CALLED WITH NMG_LOCK()
- *
- * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
- * We assume that this is called with a name of at least NM_NAME chars.
- */
-static struct nm_bridge *
-nm_find_bridge(const char *name, int create)
-{
- int i, namelen;
- struct nm_bridge *b = NULL, *bridges;
- u_int num_bridges;
-
- NMG_LOCK_ASSERT();
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- namelen = nm_vale_name_validate(name);
- if (namelen < 0) {
- D("invalid bridge name %s", name ? name : NULL);
- return NULL;
- }
-
- /* lookup the name, remember empty slot if there is one */
- for (i = 0; i < num_bridges; i++) {
- struct nm_bridge *x = bridges + i;
-
- if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
- if (create && b == NULL)
- b = x; /* record empty slot */
- } else if (x->bdg_namelen != namelen) {
- continue;
- } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
- ND("found '%.*s' at %d", namelen, name, i);
- b = x;
- break;
- }
- }
- if (i == num_bridges && b) { /* name not found, can create entry */
- /* initialize the bridge */
- ND("create new bridge %s with ports %d", b->bdg_basename,
- b->bdg_active_ports);
- b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- if (b->ht == NULL) {
- D("failed to allocate hash table");
- return NULL;
- }
- strncpy(b->bdg_basename, name, namelen);
- b->bdg_namelen = namelen;
- b->bdg_active_ports = 0;
- for (i = 0; i < NM_BDG_MAXPORTS; i++)
- b->bdg_port_index[i] = i;
- /* set the default function */
- b->bdg_ops = &default_bdg_ops;
- b->private_data = b->ht;
- b->bdg_flags = 0;
- NM_BNS_GET(b);
- }
- return b;
-}
-
-
/*
* Free the forwarding tables for rings attached to switch ports.
*/
@@ -464,99 +237,6 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
return 0;
}
-static int
-netmap_bdg_free(struct nm_bridge *b)
-{
- if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
- return EBUSY;
- }
-
- ND("marking bridge %s as free", b->bdg_basename);
- nm_os_free(b->ht);
- b->bdg_ops = NULL;
- b->bdg_flags = 0;
- NM_BNS_PUT(b);
- return 0;
-}
-
-
-/* remove from bridge b the ports in slots hw and sw
- * (sw can be -1 if not needed)
- */
-static void
-netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
-{
- int s_hw = hw, s_sw = sw;
- int i, lim =b->bdg_active_ports;
- uint32_t *tmp = b->tmp_bdg_port_index;
-
- /*
- New algorithm:
- make a copy of bdg_port_index;
- lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
- in the array of bdg_port_index, replacing them with
- entries from the bottom of the array;
- decrement bdg_active_ports;
- acquire BDG_WLOCK() and copy back the array.
- */
-
- if (netmap_verbose)
- D("detach %d and %d (lim %d)", hw, sw, lim);
- /* make a copy of the list of active ports, update it,
- * and then copy back within BDG_WLOCK().
- */
- memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
- for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
- if (hw >= 0 && tmp[i] == hw) {
- ND("detach hw %d at %d", hw, i);
- lim--; /* point to last active port */
- tmp[i] = tmp[lim]; /* swap with i */
- tmp[lim] = hw; /* now this is inactive */
- hw = -1;
- } else if (sw >= 0 && tmp[i] == sw) {
- ND("detach sw %d at %d", sw, i);
- lim--;
- tmp[i] = tmp[lim];
- tmp[lim] = sw;
- sw = -1;
- } else {
- i++;
- }
- }
- if (hw >= 0 || sw >= 0) {
- D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
- }
-
- BDG_WLOCK(b);
- if (b->bdg_ops->dtor)
- b->bdg_ops->dtor(b->bdg_ports[s_hw]);
- b->bdg_ports[s_hw] = NULL;
- if (s_sw >= 0) {
- b->bdg_ports[s_sw] = NULL;
- }
- memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
- b->bdg_active_ports = lim;
- BDG_WUNLOCK(b);
-
- ND("now %d active ports", lim);
- netmap_bdg_free(b);
-}
-
-static inline void *
-nm_bdg_get_auth_token(struct nm_bridge *b)
-{
- return b->ht;
-}
-
-/* bridge not in exclusive mode ==> always valid
- * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
- */
-static inline int
-nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
-{
- return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
-}
-
/* Allows external modules to create bridges in exclusive mode,
* returns an authentication token that the external module will need
* to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
@@ -564,19 +244,19 @@ nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
* Successfully executed if ret != NULL and *return_status == 0.
*/
void *
-netmap_bdg_create(const char *bdg_name, int *return_status)
+netmap_vale_create(const char *bdg_name, int *return_status)
{
struct nm_bridge *b = NULL;
void *ret = NULL;
NMG_LOCK();
- b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
if (b) {
*return_status = EEXIST;
goto unlock_bdg_create;
}
- b = nm_find_bridge(bdg_name, 1 /* create */);
+ b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
if (!b) {
*return_status = ENOMEM;
goto unlock_bdg_create;
@@ -595,13 +275,13 @@ unlock_bdg_create:
* netmap_bdg_create(), the bridge must be empty.
*/
int
-netmap_bdg_destroy(const char *bdg_name, void *auth_token)
+netmap_vale_destroy(const char *bdg_name, void *auth_token)
{
struct nm_bridge *b = NULL;
int ret = 0;
NMG_LOCK();
- b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
if (!b) {
ret = ENXIO;
goto unlock_bdg_free;
@@ -629,27 +309,6 @@ unlock_bdg_free:
-/* nm_bdg_ctl callback for VALE ports */
-static int
-netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
-{
- struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- struct nm_bridge *b = vpna->na_bdg;
-
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- return 0; /* nothing to do */
- }
- if (b) {
- netmap_set_all_rings(na, 0 /* disable */);
- netmap_bdg_detach_common(b, vpna->bdg_port, -1);
- vpna->na_bdg = NULL;
- netmap_set_all_rings(na, 1 /* enable */);
- }
- /* I have took reference just for attach */
- netmap_adapter_put(na);
- return 0;
-}
-
/* nm_dtor callback for ephemeral VALE ports */
static void
netmap_vp_dtor(struct netmap_adapter *na)
@@ -664,7 +323,7 @@ netmap_vp_dtor(struct netmap_adapter *na)
}
if (na->ifp != NULL && !nm_iszombie(na)) {
- WNA(na->ifp) = NULL;
+ NM_DETACH_NA(na->ifp);
if (vpna->autodelete) {
ND("releasing %s", na->ifp->if_xname);
NMG_UNLOCK();
@@ -674,895 +333,6 @@ netmap_vp_dtor(struct netmap_adapter *na)
}
}
-/* creates a persistent VALE port */
-int
-nm_vi_create(struct nmreq_header *hdr)
-{
- struct nmreq_vale_newif *req =
- (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
- int error = 0;
- /* Build a nmreq_register out of the nmreq_vale_newif,
- * so that we can call netmap_get_bdg_na(). */
- struct nmreq_register regreq;
- bzero(&regreq, sizeof(regreq));
- regreq.nr_tx_slots = req->nr_tx_slots;
- regreq.nr_rx_slots = req->nr_rx_slots;
- regreq.nr_tx_rings = req->nr_tx_rings;
- regreq.nr_rx_rings = req->nr_rx_rings;
- regreq.nr_mem_id = req->nr_mem_id;
- hdr->nr_reqtype = NETMAP_REQ_REGISTER;
- hdr->nr_body = (uintptr_t)&regreq;
- error = netmap_vi_create(hdr, 0 /* no autodelete */);
- hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
- hdr->nr_body = (uintptr_t)req;
- /* Write back to the original struct. */
- req->nr_tx_slots = regreq.nr_tx_slots;
- req->nr_rx_slots = regreq.nr_rx_slots;
- req->nr_tx_rings = regreq.nr_tx_rings;
- req->nr_rx_rings = regreq.nr_rx_rings;
- req->nr_mem_id = regreq.nr_mem_id;
- return error;
-}
-
-/* remove a persistent VALE port from the system */
-int
-nm_vi_destroy(const char *name)
-{
- struct ifnet *ifp;
- struct netmap_vp_adapter *vpna;
- int error;
-
- ifp = ifunit_ref(name);
- if (!ifp)
- return ENXIO;
- NMG_LOCK();
- /* make sure this is actually a VALE port */
- if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
- error = EINVAL;
- goto err;
- }
-
- vpna = (struct netmap_vp_adapter *)NA(ifp);
-
- /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
- if (vpna->autodelete) {
- error = EINVAL;
- goto err;
- }
-
- /* also make sure that nobody is using the inferface */
- if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
- vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
- error = EBUSY;
- goto err;
- }
-
- NMG_UNLOCK();
-
- D("destroying a persistent vale interface %s", ifp->if_xname);
- /* Linux requires all the references are released
- * before unregister
- */
- netmap_detach(ifp);
- if_rele(ifp);
- nm_os_vi_detach(ifp);
- return 0;
-
-err:
- NMG_UNLOCK();
- if_rele(ifp);
- return error;
-}
-
-static int
-nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
-{
- req->nr_rx_rings = na->num_rx_rings;
- req->nr_tx_rings = na->num_tx_rings;
- req->nr_rx_slots = na->num_rx_desc;
- req->nr_tx_slots = na->num_tx_desc;
- return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
- &req->nr_mem_id);
-}
-
-/*
- * Create a virtual interface registered to the system.
- * The interface will be attached to a bridge later.
- */
-int
-netmap_vi_create(struct nmreq_header *hdr, int autodelete)
-{
- struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
- struct ifnet *ifp;
- struct netmap_vp_adapter *vpna;
- struct netmap_mem_d *nmd = NULL;
- int error;
-
- if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
- return EINVAL;
- }
-
- /* don't include VALE prefix */
- if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
- return EINVAL;
- if (strlen(hdr->nr_name) >= IFNAMSIZ) {
- return EINVAL;
- }
- ifp = ifunit_ref(hdr->nr_name);
- if (ifp) { /* already exist, cannot create new one */
- error = EEXIST;
- NMG_LOCK();
- if (NM_NA_VALID(ifp)) {
- int update_err = nm_update_info(req, NA(ifp));
- if (update_err)
- error = update_err;
- }
- NMG_UNLOCK();
- if_rele(ifp);
- return error;
- }
- error = nm_os_vi_persist(hdr->nr_name, &ifp);
- if (error)
- return error;
-
- NMG_LOCK();
- if (req->nr_mem_id) {
- nmd = netmap_mem_find(req->nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto err_1;
- }
- }
- /* netmap_vp_create creates a struct netmap_vp_adapter */
- error = netmap_vp_create(hdr, ifp, nmd, &vpna);
- if (error) {
- D("error %d", error);
- goto err_1;
- }
- /* persist-specific routines */
- vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
- if (!autodelete) {
- netmap_adapter_get(&vpna->up);
- } else {
- vpna->autodelete = 1;
- }
- NM_ATTACH_NA(ifp, &vpna->up);
- /* return the updated info */
- error = nm_update_info(req, &vpna->up);
- if (error) {
- goto err_2;
- }
- ND("returning nr_mem_id %d", req->nr_mem_id);
- if (nmd)
- netmap_mem_put(nmd);
- NMG_UNLOCK();
- ND("created %s", ifp->if_xname);
- return 0;
-
-err_2:
- netmap_detach(ifp);
-err_1:
- if (nmd)
- netmap_mem_put(nmd);
- NMG_UNLOCK();
- nm_os_vi_detach(ifp);
-
- return error;
-}
-
-/* Try to get a reference to a netmap adapter attached to a VALE switch.
- * If the adapter is found (or is created), this function returns 0, a
- * non NULL pointer is returned into *na, and the caller holds a
- * reference to the adapter.
- * If an adapter is not found, then no reference is grabbed and the
- * function returns an error code, or 0 if there is just a VALE prefix
- * mismatch. Therefore the caller holds a reference when
- * (*na != NULL && return == 0).
- */
-int
-netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create)
-{
- char *nr_name = hdr->nr_name;
- const char *ifname;
- struct ifnet *ifp = NULL;
- int error = 0;
- struct netmap_vp_adapter *vpna, *hostna = NULL;
- struct nm_bridge *b;
- uint32_t i, j;
- uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
- int needed;
-
- *na = NULL; /* default return value */
-
- /* first try to see if this is a bridge port. */
- NMG_LOCK_ASSERT();
- if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
- return 0; /* no error, but no VALE prefix */
- }
-
- b = nm_find_bridge(nr_name, create);
- if (b == NULL) {
- ND("no bridges available for '%s'", nr_name);
- return (create ? ENOMEM : ENXIO);
- }
- if (strlen(nr_name) < b->bdg_namelen) /* impossible */
- panic("x");
-
- /* Now we are sure that name starts with the bridge's name,
- * lookup the port in the bridge. We need to scan the entire
- * list. It is not important to hold a WLOCK on the bridge
- * during the search because NMG_LOCK already guarantees
- * that there are no other possible writers.
- */
-
- /* lookup in the local list of ports */
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- ND("checking %s", vpna->up.name);
- if (!strcmp(vpna->up.name, nr_name)) {
- netmap_adapter_get(&vpna->up);
- ND("found existing if %s refs %d", nr_name)
- *na = &vpna->up;
- return 0;
- }
- }
- /* not found, should we create it? */
- if (!create)
- return ENXIO;
- /* yes we should, see if we have space to attach entries */
- needed = 2; /* in some cases we only need 1 */
- if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
- D("bridge full %d, cannot create new port", b->bdg_active_ports);
- return ENOMEM;
- }
- /* record the next two ports available, but do not allocate yet */
- cand = b->bdg_port_index[b->bdg_active_ports];
- cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
- ND("+++ bridge %s port %s used %d avail %d %d",
- b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
-
- /*
- * try see if there is a matching NIC with this name
- * (after the bridge's name)
- */
- ifname = nr_name + b->bdg_namelen + 1;
- ifp = ifunit_ref(ifname);
- if (!ifp) {
- /* Create an ephemeral virtual port.
- * This block contains all the ephemeral-specific logic.
- */
-
- if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
- error = EINVAL;
- goto out;
- }
-
- /* bdg_netmap_attach creates a struct netmap_adapter */
- error = netmap_vp_create(hdr, NULL, nmd, &vpna);
- if (error) {
- D("error %d", error);
- goto out;
- }
- /* shortcut - we can skip get_hw_na(),
- * ownership check and nm_bdg_attach()
- */
-
- } else {
- struct netmap_adapter *hw;
-
- /* the vale:nic syntax is only valid for some commands */
- switch (hdr->nr_reqtype) {
- case NETMAP_REQ_VALE_ATTACH:
- case NETMAP_REQ_VALE_DETACH:
- case NETMAP_REQ_VALE_POLLING_ENABLE:
- case NETMAP_REQ_VALE_POLLING_DISABLE:
- break; /* ok */
- default:
- error = EINVAL;
- goto out;
- }
-
- error = netmap_get_hw_na(ifp, nmd, &hw);
- if (error || hw == NULL)
- goto out;
-
- /* host adapter might not be created */
- error = hw->nm_bdg_attach(nr_name, hw);
- if (error)
- goto out;
- vpna = hw->na_vp;
- hostna = hw->na_hostvp;
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- /* Check if we need to skip the host rings. */
- struct nmreq_vale_attach *areq =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- if (areq->reg.nr_mode != NR_REG_NIC_SW) {
- hostna = NULL;
- }
- }
- }
-
- BDG_WLOCK(b);
- vpna->bdg_port = cand;
- ND("NIC %p to bridge port %d", vpna, cand);
- /* bind the port to the bridge (virtual ports are not active) */
- b->bdg_ports[cand] = vpna;
- vpna->na_bdg = b;
- b->bdg_active_ports++;
- if (hostna != NULL) {
- /* also bind the host stack to the bridge */
- b->bdg_ports[cand2] = hostna;
- hostna->bdg_port = cand2;
- hostna->na_bdg = b;
- b->bdg_active_ports++;
- ND("host %p to bridge port %d", hostna, cand2);
- }
- ND("if %s refs %d", ifname, vpna->up.na_refcount);
- BDG_WUNLOCK(b);
- *na = &vpna->up;
- netmap_adapter_get(*na);
-
-out:
- if (ifp)
- if_rele(ifp);
-
- return error;
-}
-
-/* Process NETMAP_REQ_VALE_ATTACH.
- */
-int
-nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter * vpna;
- struct netmap_adapter *na;
- struct netmap_mem_d *nmd = NULL;
- struct nm_bridge *b = NULL;
- int error;
-
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- if (req->reg.nr_mem_id) {
- nmd = netmap_mem_find(req->reg.nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto unlock_exit;
- }
- }
-
- /* check for existing one */
- error = netmap_get_bdg_na(hdr, &na, nmd, 0);
- if (!error) {
- error = EBUSY;
- goto unref_exit;
- }
- error = netmap_get_bdg_na(hdr, &na,
- nmd, 1 /* create if not exists */);
- if (error) { /* no device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- }
-
- if (NETMAP_OWNED_BY_ANY(na)) {
- error = EBUSY;
- goto unref_exit;
- }
-
- if (na->nm_bdg_ctl) {
- /* nop for VALE ports. The bwrap needs to put the hwna
- * in netmap mode (see netmap_bwrap_bdg_ctl)
- */
- error = na->nm_bdg_ctl(hdr, na);
- if (error)
- goto unref_exit;
- ND("registered %s to netmap-mode", na->name);
- }
- vpna = (struct netmap_vp_adapter *)na;
- req->port_index = vpna->bdg_port;
- NMG_UNLOCK();
- return 0;
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-}
-
-static inline int
-nm_is_bwrap(struct netmap_adapter *na)
-{
- return na->nm_register == netmap_bwrap_reg;
-}
-
-/* Process NETMAP_REQ_VALE_DETACH.
- */
-int
-nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter *vpna;
- struct netmap_adapter *na;
- struct nm_bridge *b = NULL;
- int error;
-
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- } else if (nm_is_bwrap(na) &&
- ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
- /* Don't detach a NIC with polling */
- error = EBUSY;
- goto unref_exit;
- }
-
- vpna = (struct netmap_vp_adapter *)na;
- if (na->na_vp != vpna) {
- /* trying to detach first attach of VALE persistent port attached
- * to 2 bridges
- */
- error = EBUSY;
- goto unref_exit;
- }
- nmreq_det->port_index = vpna->bdg_port;
-
- if (na->nm_bdg_ctl) {
- /* remove the port from bridge. The bwrap
- * also needs to put the hwna in normal mode
- */
- error = na->nm_bdg_ctl(hdr, na);
- }
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-
-}
-
-struct nm_bdg_polling_state;
-struct
-nm_bdg_kthread {
- struct nm_kctx *nmk;
- u_int qfirst;
- u_int qlast;
- struct nm_bdg_polling_state *bps;
-};
-
-struct nm_bdg_polling_state {
- bool configured;
- bool stopped;
- struct netmap_bwrap_adapter *bna;
- uint32_t mode;
- u_int qfirst;
- u_int qlast;
- u_int cpu_from;
- u_int ncpus;
- struct nm_bdg_kthread *kthreads;
-};
-
-static void
-netmap_bwrap_polling(void *data, int is_kthread)
-{
- struct nm_bdg_kthread *nbk = data;
- struct netmap_bwrap_adapter *bna;
- u_int qfirst, qlast, i;
- struct netmap_kring **kring0, *kring;
-
- if (!nbk)
- return;
- qfirst = nbk->qfirst;
- qlast = nbk->qlast;
- bna = nbk->bps->bna;
- kring0 = NMR(bna->hwna, NR_RX);
-
- for (i = qfirst; i < qlast; i++) {
- kring = kring0[i];
- kring->nm_notify(kring, 0);
- }
-}
-
-static int
-nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
-{
- struct nm_kctx_cfg kcfg;
- int i, j;
-
- bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
- if (bps->kthreads == NULL)
- return ENOMEM;
-
- bzero(&kcfg, sizeof(kcfg));
- kcfg.worker_fn = netmap_bwrap_polling;
- kcfg.use_kthread = 1;
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- int all = (bps->ncpus == 1 &&
- bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
- int affinity = bps->cpu_from + i;
-
- t->bps = bps;
- t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
- t->qlast = all ? bps->qlast : t->qfirst + 1;
- D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
- t->qlast);
-
- kcfg.type = i;
- kcfg.worker_private = t;
- t->nmk = nm_os_kctx_create(&kcfg, NULL);
- if (t->nmk == NULL) {
- goto cleanup;
- }
- nm_os_kctx_worker_setaff(t->nmk, affinity);
- }
- return 0;
-
-cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_destroy(t->nmk);
- }
- nm_os_free(bps->kthreads);
- return EFAULT;
-}
-
-/* A variant of ptnetmap_start_kthreads() */
-static int
-nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
-{
- int error, i, j;
-
- if (!bps) {
- D("polling is not configured");
- return EFAULT;
- }
- bps->stopped = false;
-
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- error = nm_os_kctx_worker_start(t->nmk);
- if (error) {
- D("error in nm_kthread_start()");
- goto cleanup;
- }
- }
- return 0;
-
-cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_worker_stop(t->nmk);
- }
- bps->stopped = true;
- return error;
-}
-
-static void
-nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
-{
- int i;
-
- if (!bps)
- return;
-
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_worker_stop(t->nmk);
- nm_os_kctx_destroy(t->nmk);
- }
- bps->stopped = true;
-}
-
-static int
-get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
- struct nm_bdg_polling_state *bps)
-{
- unsigned int avail_cpus, core_from;
- unsigned int qfirst, qlast;
- uint32_t i = req->nr_first_cpu_id;
- uint32_t req_cpus = req->nr_num_polling_cpus;
-
- avail_cpus = nm_os_ncpus();
-
- if (req_cpus == 0) {
- D("req_cpus must be > 0");
- return EINVAL;
- } else if (req_cpus >= avail_cpus) {
- D("Cannot use all the CPUs in the system");
- return EINVAL;
- }
-
- if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
- /* Use a separate core for each ring. If nr_num_polling_cpus>1
- * more consecutive rings are polled.
- * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
- * ring 2 and 3 are polled by core 2 and 3, respectively. */
- if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
- D("Rings %u-%u not in range (have %d rings)",
- i, i + req_cpus, nma_get_nrings(na, NR_RX));
- return EINVAL;
- }
- qfirst = i;
- qlast = qfirst + req_cpus;
- core_from = qfirst;
-
- } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
- /* Poll all the rings using a core specified by nr_first_cpu_id.
- * the number of cores must be 1. */
- if (req_cpus != 1) {
- D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
- "(was %d)", req_cpus);
- return EINVAL;
- }
- qfirst = 0;
- qlast = nma_get_nrings(na, NR_RX);
- core_from = i;
- } else {
- D("Invalid polling mode");
- return EINVAL;
- }
-
- bps->mode = req->nr_mode;
- bps->qfirst = qfirst;
- bps->qlast = qlast;
- bps->cpu_from = core_from;
- bps->ncpus = req_cpus;
- D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
- req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
- "MULTI" : "SINGLE",
- qfirst, qlast, core_from, req_cpus);
- return 0;
-}
-
-static int
-nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
-{
- struct nm_bdg_polling_state *bps;
- struct netmap_bwrap_adapter *bna;
- int error;
-
- bna = (struct netmap_bwrap_adapter *)na;
- if (bna->na_polling_state) {
- D("ERROR adapter already in polling mode");
- return EFAULT;
- }
-
- bps = nm_os_malloc(sizeof(*bps));
- if (!bps)
- return ENOMEM;
- bps->configured = false;
- bps->stopped = true;
-
- if (get_polling_cfg(req, na, bps)) {
- nm_os_free(bps);
- return EINVAL;
- }
-
- if (nm_bdg_create_kthreads(bps)) {
- nm_os_free(bps);
- return EFAULT;
- }
-
- bps->configured = true;
- bna->na_polling_state = bps;
- bps->bna = bna;
-
- /* disable interrupts if possible */
- nma_intr_enable(bna->hwna, 0);
- /* start kthread now */
- error = nm_bdg_polling_start_kthreads(bps);
- if (error) {
- D("ERROR nm_bdg_polling_start_kthread()");
- nm_os_free(bps->kthreads);
- nm_os_free(bps);
- bna->na_polling_state = NULL;
- nma_intr_enable(bna->hwna, 1);
- }
- return error;
-}
-
-static int
-nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
- struct nm_bdg_polling_state *bps;
-
- if (!bna->na_polling_state) {
- D("ERROR adapter is not in polling mode");
- return EFAULT;
- }
- bps = bna->na_polling_state;
- nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
- bps->configured = false;
- nm_os_free(bps);
- bna->na_polling_state = NULL;
- /* reenable interrupts */
- nma_intr_enable(bna->hwna, 1);
- return 0;
-}
-
-int
-nm_bdg_polling(struct nmreq_header *hdr)
-{
- struct nmreq_vale_polling *req =
- (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
- struct netmap_adapter *na = NULL;
- int error = 0;
-
- NMG_LOCK();
- error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0);
- if (na && !error) {
- if (!nm_is_bwrap(na)) {
- error = EOPNOTSUPP;
- } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
- error = nm_bdg_ctl_polling_start(req, na);
- if (!error)
- netmap_adapter_get(na);
- } else {
- error = nm_bdg_ctl_polling_stop(na);
- if (!error)
- netmap_adapter_put(na);
- }
- netmap_adapter_put(na);
- } else if (!na && !error) {
- /* Not VALE port. */
- error = EINVAL;
- }
- NMG_UNLOCK();
-
- return error;
-}
-
-/* Process NETMAP_REQ_VALE_LIST. */
-int
-netmap_bdg_list(struct nmreq_header *hdr)
-{
- struct nmreq_vale_list *req =
- (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- int namelen = strlen(hdr->nr_name);
- struct nm_bridge *b, *bridges;
- struct netmap_vp_adapter *vpna;
- int error = 0, i, j;
- u_int num_bridges;
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(hdr->nr_name, NM_BDG_NAME,
- strlen(NM_BDG_NAME))) {
- return EINVAL;
- }
- NMG_LOCK();
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (!b) {
- NMG_UNLOCK();
- return ENOENT;
- }
-
- req->nr_bridge_idx = b - bridges; /* bridge index */
- req->nr_port_idx = NM_BDG_NOPORT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- if (vpna == NULL) {
- D("This should not happen");
- continue;
- }
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(vpna->up.name, hdr->nr_name)) {
- req->nr_port_idx = i; /* port index */
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = req->nr_bridge_idx;
- j = req->nr_port_idx;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- for ( ; j < NM_BDG_MAXPORTS; j++) {
- if (b->bdg_ports[j] == NULL)
- continue;
- vpna = b->bdg_ports[j];
- /* write back the VALE switch name */
- strncpy(hdr->nr_name, vpna->up.name,
- (size_t)IFNAMSIZ);
- error = 0;
- goto out;
- }
- j = 0; /* following bridges scan from 0 */
- }
- out:
- req->nr_bridge_idx = i;
- req->nr_port_idx = j;
- NMG_UNLOCK();
- }
-
- return error;
-}
-
-/* Called by external kernel modules (e.g., Openvswitch).
- * to set configure/lookup/dtor functions of a VALE instance.
- * Register callbacks to the given bridge. 'name' may be just
- * bridge's name (including ':' if it is not just NM_BDG_NAME).
- *
- * Called without NMG_LOCK.
- */
-
-int
-netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
-{
- struct nm_bridge *b;
- int error = 0;
-
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
- if (!b) {
- error = ENXIO;
- goto unlock_regops;
- }
- if (!nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_regops;
- }
-
- BDG_WLOCK(b);
- if (!bdg_ops) {
- /* resetting the bridge */
- bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- b->bdg_ops = &default_bdg_ops;
- b->private_data = b->ht;
- } else {
- /* modifying the bridge */
- b->private_data = private_data;
- b->bdg_ops = bdg_ops;
- }
- BDG_WUNLOCK(b);
-
-unlock_regops:
- NMG_UNLOCK();
- return error;
-}
/* Called by external kernel modules (e.g., Openvswitch).
* to modify the private data previously given to regops().
@@ -1579,7 +349,7 @@ nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callba
int error = 0;
NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
if (!b) {
error = EINVAL;
goto unlock_update_priv;
@@ -1598,27 +368,6 @@ unlock_update_priv:
return error;
}
-int
-netmap_bdg_config(struct nm_ifreq *nr)
-{
- struct nm_bridge *b;
- int error = EINVAL;
-
- NMG_LOCK();
- b = nm_find_bridge(nr->nifr_name, 0);
- if (!b) {
- NMG_UNLOCK();
- return error;
- }
- NMG_UNLOCK();
- /* Don't call config() with NMG_LOCK() held */
- BDG_RLOCK(b);
- if (b->bdg_ops->config != NULL)
- error = b->bdg_ops->config(nr);
- BDG_RUNLOCK(b);
- return error;
-}
-
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
@@ -1798,52 +547,6 @@ nm_bridge_rthash(const uint8_t *addr)
#undef mix
-/* nm_register callback for VALE ports */
-static int
-netmap_vp_reg(struct netmap_adapter *na, int onoff)
-{
- struct netmap_vp_adapter *vpna =
- (struct netmap_vp_adapter*)na;
- enum txrx t;
- int i;
-
- /* persistent ports may be put in netmap mode
- * before being attached to a bridge
- */
- if (vpna->na_bdg)
- BDG_WLOCK(vpna->na_bdg);
- if (onoff) {
- for_rx_tx(t) {
- for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = NMR(na, t)[i];
-
- if (nm_kring_pending_on(kring))
- kring->nr_mode = NKR_NETMAP_ON;
- }
- }
- if (na->active_fds == 0)
- na->na_flags |= NAF_NETMAP_ON;
- /* XXX on FreeBSD, persistent VALE ports should also
- * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
- */
- } else {
- if (na->active_fds == 0)
- na->na_flags &= ~NAF_NETMAP_ON;
- for_rx_tx(t) {
- for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = NMR(na, t)[i];
-
- if (nm_kring_pending_off(kring))
- kring->nr_mode = NKR_NETMAP_OFF;
- }
- }
- }
- if (vpna->na_bdg)
- BDG_WUNLOCK(vpna->na_bdg);
- return 0;
-}
-
-
/*
* Lookup function for a learning bridge.
* Update the hash table with the source address,
@@ -2361,86 +1064,6 @@ done:
}
-/* rxsync code used by VALE ports nm_rxsync callback and also
- * internally by the brwap
- */
-static int
-netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
-{
- struct netmap_adapter *na = kring->na;
- struct netmap_ring *ring = kring->ring;
- u_int nm_i, lim = kring->nkr_num_slots - 1;
- u_int head = kring->rhead;
- int n;
-
- if (head > lim) {
- D("ouch dangerous reset!!!");
- n = netmap_ring_reinit(kring);
- goto done;
- }
-
- /* First part, import newly received packets. */
- /* actually nothing to do here, they are already in the kring */
-
- /* Second part, skip past packets that userspace has released. */
- nm_i = kring->nr_hwcur;
- if (nm_i != head) {
- /* consistency check, but nothing really important here */
- for (n = 0; likely(nm_i != head); n++) {
- struct netmap_slot *slot = &ring->slot[nm_i];
- void *addr = NMB(na, slot);
-
- if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
- D("bad buffer index %d, ignore ?",
- slot->buf_idx);
- }
- slot->flags &= ~NS_BUF_CHANGED;
- nm_i = nm_next(nm_i, lim);
- }
- kring->nr_hwcur = head;
- }
-
- n = 0;
-done:
- return n;
-}
-
-/*
- * nm_rxsync callback for VALE ports
- * user process reading from a VALE switch.
- * Already protected against concurrent calls from userspace,
- * but we must acquire the queue's lock to protect against
- * writers on the same queue.
- */
-static int
-netmap_vp_rxsync(struct netmap_kring *kring, int flags)
-{
- int n;
-
- mtx_lock(&kring->q_lock);
- n = netmap_vp_rxsync_locked(kring, flags);
- mtx_unlock(&kring->q_lock);
- return n;
-}
-
-
-/* nm_bdg_attach callback for VALE ports
- * The na_vp port is this same netmap_adapter. There is no host port.
- */
-static int
-netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
-{
- struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
-
- if (vpna->na_bdg) {
- return netmap_bwrap_attach(name, na);
- }
- na->na_vp = vpna;
- strncpy(na->name, name, sizeof(na->name));
- na->na_hostvp = NULL;
- return 0;
-}
-
/* create a netmap_vp_adapter that describes a VALE port.
* Only persistent VALE ports have a non-null ifp.
*/
@@ -2536,635 +1159,270 @@ err:
return error;
}
-/* Bridge wrapper code (bwrap).
- * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
- * VALE switch.
- * The main task is to swap the meaning of tx and rx rings to match the
- * expectations of the VALE switch code (see nm_bdg_flush).
- *
- * The bwrap works by interposing a netmap_bwrap_adapter between the
- * rest of the system and the hwna. The netmap_bwrap_adapter looks like
- * a netmap_vp_adapter to the rest the system, but, internally, it
- * translates all callbacks to what the hwna expects.
- *
- * Note that we have to intercept callbacks coming from two sides:
- *
- * - callbacks coming from the netmap module are intercepted by
- * passing around the netmap_bwrap_adapter instead of the hwna
- *
- * - callbacks coming from outside of the netmap module only know
- * about the hwna. This, however, only happens in interrupt
- * handlers, where only the hwna->nm_notify callback is called.
- * What the bwrap does is to overwrite the hwna->nm_notify callback
- * with its own netmap_bwrap_intr_notify.
- * XXX This assumes that the hwna->nm_notify callback was the
- * standard netmap_notify(), as it is the case for nic adapters.
- * Any additional action performed by hwna->nm_notify will not be
- * performed by netmap_bwrap_intr_notify.
- *
- * Additionally, the bwrap can optionally attach the host rings pair
- * of the wrapped adapter to a different port of the switch.
- */
-
-
-static void
-netmap_bwrap_dtor(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct nm_bridge *b = bna->up.na_bdg,
- *bh = bna->host.na_bdg;
-
- if (bna->host.up.nm_mem)
- netmap_mem_put(bna->host.up.nm_mem);
-
- if (b) {
- netmap_bdg_detach_common(b, bna->up.bdg_port,
- (bh ? bna->host.bdg_port : -1));
- }
-
- ND("na %p", na);
- na->ifp = NULL;
- bna->host.up.ifp = NULL;
- hwna->na_vp = bna->saved_na_vp;
- hwna->na_hostvp = NULL;
- hwna->na_private = NULL;
- hwna->na_flags &= ~NAF_BUSY;
- netmap_adapter_put(hwna);
-
-}
-
-
-/*
- * Intr callback for NICs connected to a bridge.
- * Simply ignore tx interrupts (maybe we could try to recover space ?)
- * and pass received packets from nic to the bridge.
- *
- * XXX TODO check locking: this is called from the interrupt
- * handler so we should make sure that the interface is not
- * disconnected while passing down an interrupt.
- *
- * Note, no user process can access this NIC or the host stack.
- * The only part of the ring that is significant are the slots,
- * and head/cur/tail are set from the kring as needed
- * (part as a receive ring, part as a transmit ring).
- *
- * callback that overwrites the hwna notify callback.
- * Packets come from the outside or from the host stack and are put on an
- * hwna rx ring.
- * The bridge wrapper then sends the packets through the bridge.
+/* nm_bdg_attach callback for VALE ports
+ * The na_vp port is this same netmap_adapter. There is no host port.
*/
static int
-netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
-{
- struct netmap_adapter *na = kring->na;
- struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_kring *bkring;
- struct netmap_vp_adapter *vpna = &bna->up;
- u_int ring_nr = kring->ring_id;
- int ret = NM_IRQ_COMPLETED;
- int error;
-
- if (netmap_verbose)
- D("%s %s 0x%x", na->name, kring->name, flags);
-
- bkring = vpna->up.tx_rings[ring_nr];
-
- /* make sure the ring is not disabled */
- if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
- return EIO;
- }
-
- if (netmap_verbose)
- D("%s head %d cur %d tail %d", na->name,
- kring->rhead, kring->rcur, kring->rtail);
-
- /* simulate a user wakeup on the rx ring
- * fetch packets that have arrived.
- */
- error = kring->nm_sync(kring, 0);
- if (error)
- goto put_out;
- if (kring->nr_hwcur == kring->nr_hwtail) {
- if (netmap_verbose)
- D("how strange, interrupt with no packets on %s",
- na->name);
- goto put_out;
- }
-
- /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
- * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
- * to push all packets out.
- */
- bkring->rhead = bkring->rcur = kring->nr_hwtail;
-
- netmap_vp_txsync(bkring, flags);
-
- /* mark all buffers as released on this ring */
- kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
- /* another call to actually release the buffers */
- error = kring->nm_sync(kring, 0);
-
- /* The second rxsync may have further advanced hwtail. If this happens,
- * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
- if (kring->rcur != kring->nr_hwtail) {
- ret = NM_IRQ_RESCHED;
- }
-put_out:
- nm_kr_put(kring);
-
- return error ? error : ret;
-}
-
-
-/* nm_register callback for bwrap */
-static int
-netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
+netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct netmap_vp_adapter *hostna = &bna->host;
- int error, i;
- enum txrx t;
-
- ND("%s %s", na->name, onoff ? "on" : "off");
-
- if (onoff) {
- /* netmap_do_regif has been called on the bwrap na.
- * We need to pass the information about the
- * memory allocator down to the hwna before
- * putting it in netmap mode
- */
- hwna->na_lut = na->na_lut;
-
- if (hostna->na_bdg) {
- /* if the host rings have been attached to switch,
- * we need to copy the memory allocator information
- * in the hostna also
- */
- hostna->up.na_lut = na->na_lut;
- }
-
- }
-
- /* pass down the pending ring state information */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
- NMR(hwna, t)[i]->nr_pending_mode =
- NMR(na, t)[i]->nr_pending_mode;
- }
-
- /* forward the request to the hwna */
- error = hwna->nm_register(hwna, onoff);
- if (error)
- return error;
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- /* copy up the current ring state information */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = NMR(hwna, t)[i];
- NMR(na, t)[i]->nr_mode = kring->nr_mode;
- }
+ if (b->bdg_ops != &vale_bdg_ops) {
+ return NM_NEED_BWRAP;
}
-
- /* impersonate a netmap_vp_adapter */
- netmap_vp_reg(na, onoff);
- if (hostna->na_bdg)
- netmap_vp_reg(&hostna->up, onoff);
-
- if (onoff) {
- u_int i;
- /* intercept the hwna nm_nofify callback on the hw rings */
- for (i = 0; i < hwna->num_rx_rings; i++) {
- hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
- hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
- }
- i = hwna->num_rx_rings; /* for safety */
- /* save the host ring notify unconditionally */
- hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
- if (hostna->na_bdg) {
- /* also intercept the host ring notify */
- hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
- }
- if (na->active_fds == 0)
- na->na_flags |= NAF_NETMAP_ON;
- } else {
- u_int i;
-
- if (na->active_fds == 0)
- na->na_flags &= ~NAF_NETMAP_ON;
-
- /* reset all notify callbacks (including host ring) */
- for (i = 0; i <= hwna->num_rx_rings; i++) {
- hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify;
- hwna->rx_rings[i]->save_notify = NULL;
- }
- hwna->na_lut.lut = NULL;
- hwna->na_lut.plut = NULL;
- hwna->na_lut.objtotal = 0;
- hwna->na_lut.objsize = 0;
-
- /* pass ownership of the netmap rings to the hwna */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- NMR(na, t)[i]->ring = NULL;
- }
- }
-
+ if (vpna->na_bdg) {
+ return NM_NEED_BWRAP;
}
-
+ na->na_vp = vpna;
+ strncpy(na->name, name, sizeof(na->name));
+ na->na_hostvp = NULL;
return 0;
}
-/* nm_config callback for bwrap */
static int
-netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
+netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
int error;
- /* Forward the request to the hwna. It may happen that nobody
- * registered hwna yet, so netmap_mem_get_lut() may have not
- * been called yet. */
- error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
- if (error)
- return error;
- netmap_update_config(hwna);
- /* swap the results and propagate */
- info->num_tx_rings = hwna->num_rx_rings;
- info->num_tx_descs = hwna->num_rx_desc;
- info->num_rx_rings = hwna->num_tx_rings;
- info->num_rx_descs = hwna->num_tx_desc;
- info->rx_buf_maxsize = hwna->rx_buf_maxsize;
-
- return 0;
-}
-
-
-/* nm_krings_create callback for bwrap */
-static int
-netmap_bwrap_krings_create(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct netmap_adapter *hostna = &bna->host.up;
- int i, error = 0;
- enum txrx t;
-
- ND("%s", na->name);
-
/* impersonate a netmap_vp_adapter */
error = netmap_vp_krings_create(na);
if (error)
return error;
-
- /* also create the hwna krings */
- error = hwna->nm_krings_create(hwna);
- if (error) {
- goto err_del_vp_rings;
- }
-
- /* increment the usage counter for all the hwna krings */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i]->users++;
- }
- }
-
- /* now create the actual rings */
- error = netmap_mem_rings_create(hwna);
+ error = netmap_bwrap_krings_create_common(na);
if (error) {
- goto err_dec_users;
- }
-
- /* cross-link the netmap rings
- * The original number of rings comes from hwna,
- * rx rings on one side equals tx rings on the other.
- */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
- NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
- NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
- }
- }
-
- if (na->na_flags & NAF_HOST_RINGS) {
- /* the hostna rings are the host rings of the bwrap.
- * The corresponding krings must point back to the
- * hostna
- */
- hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
- hostna->tx_rings[0]->na = hostna;
- hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
- hostna->rx_rings[0]->na = hostna;
- }
-
- return 0;
-
-err_dec_users:
- for_rx_tx(t) {
- NMR(hwna, t)[i]->users--;
+ netmap_vp_krings_delete(na);
}
- hwna->nm_krings_delete(hwna);
-err_del_vp_rings:
- netmap_vp_krings_delete(na);
-
return error;
}
-
static void
-netmap_bwrap_krings_delete(struct netmap_adapter *na)
+netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- enum txrx t;
- int i;
-
- ND("%s", na->name);
-
- /* decrement the usage counter for all the hwna krings */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i]->users--;
- }
- }
-
- /* delete any netmap rings that are no longer needed */
- netmap_mem_rings_delete(hwna);
- hwna->nm_krings_delete(hwna);
+ netmap_bwrap_krings_delete_common(na);
netmap_vp_krings_delete(na);
}
-
-/* notify method for the bridge-->hwna direction */
static int
-netmap_bwrap_notify(struct netmap_kring *kring, int flags)
-{
- struct netmap_adapter *na = kring->na;
- struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_adapter *hwna = bna->hwna;
- u_int ring_n = kring->ring_id;
- u_int lim = kring->nkr_num_slots - 1;
- struct netmap_kring *hw_kring;
- int error;
-
- ND("%s: na %s hwna %s",
- (kring ? kring->name : "NULL!"),
- (na ? na->name : "NULL!"),
- (hwna ? hwna->name : "NULL!"));
- hw_kring = hwna->tx_rings[ring_n];
-
- if (nm_kr_tryget(hw_kring, 0, NULL)) {
- return ENXIO;
- }
-
- /* first step: simulate a user wakeup on the rx ring */
- netmap_vp_rxsync(kring, flags);
- ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
- na->name, ring_n,
- kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
- /* second step: the new packets are sent on the tx ring
- * (which is actually the same ring)
- */
- hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
- error = hw_kring->nm_sync(hw_kring, flags);
- if (error)
- goto put_out;
-
- /* third step: now we are back the rx ring */
- /* claim ownership on all hw owned bufs */
- kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
-
- /* fourth step: the user goes to sleep again, causing another rxsync */
- netmap_vp_rxsync(kring, flags);
- ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
- na->name, ring_n,
- kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
-put_out:
- nm_kr_put(hw_kring);
-
- return error ? error : NM_IRQ_COMPLETED;
-}
-
-
-/* nm_bdg_ctl callback for the bwrap.
- * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
- * On attach, it needs to provide a fake netmap_priv_d structure and
- * perform a netmap_do_regif() on the bwrap. This will put both the
- * bwrap and the hwna in netmap mode, with the netmap rings shared
- * and cross linked. Moroever, it will start intercepting interrupts
- * directed to hwna.
- */
-static int
-netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
-{
- struct netmap_priv_d *npriv;
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
- int error = 0;
-
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- if (req->reg.nr_ringid != 0 ||
- (req->reg.nr_mode != NR_REG_ALL_NIC &&
- req->reg.nr_mode != NR_REG_NIC_SW)) {
- /* We only support attaching all the NIC rings
- * and/or the host stack. */
- return EINVAL;
- }
- if (NETMAP_OWNED_BY_ANY(na)) {
- return EBUSY;
- }
- if (bna->na_kpriv) {
- /* nothing to do */
- return 0;
- }
- npriv = netmap_priv_new();
- if (npriv == NULL)
- return ENOMEM;
- npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
- error = netmap_do_regif(npriv, na, req->reg.nr_mode,
- req->reg.nr_ringid, req->reg.nr_flags);
- if (error) {
- netmap_priv_delete(npriv);
- return error;
- }
- bna->na_kpriv = npriv;
- na->na_flags |= NAF_BUSY;
- } else {
- if (na->active_fds == 0) /* not registered */
- return EINVAL;
- netmap_priv_delete(bna->na_kpriv);
- bna->na_kpriv = NULL;
- na->na_flags &= ~NAF_BUSY;
- }
-
- return error;
-}
-
-/* attach a bridge wrapper to the 'real' device */
-int
-netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
+netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
{
struct netmap_bwrap_adapter *bna;
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
- int error = 0;
- enum txrx t;
-
- /* make sure the NIC is not already in use */
- if (NETMAP_OWNED_BY_ANY(hwna)) {
- D("NIC %s busy, cannot attach to bridge", hwna->name);
- return EBUSY;
- }
+ int error;
bna = nm_os_malloc(sizeof(*bna));
if (bna == NULL) {
return ENOMEM;
}
-
na = &bna->up.up;
- /* make bwrap ifp point to the real ifp */
- na->ifp = hwna->ifp;
- if_ref(na->ifp);
- na->na_private = bna;
strncpy(na->name, nr_name, sizeof(na->name));
- /* fill the ring data for the bwrap adapter with rx/tx meanings
- * swapped. The real cross-linking will be done during register,
- * when all the krings will have been created.
- */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- nma_set_nrings(na, t, nma_get_nrings(hwna, r));
- nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
- }
- na->nm_dtor = netmap_bwrap_dtor;
na->nm_register = netmap_bwrap_reg;
- // na->nm_txsync = netmap_bwrap_txsync;
+ na->nm_txsync = netmap_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
- na->nm_config = netmap_bwrap_config;
- na->nm_krings_create = netmap_bwrap_krings_create;
- na->nm_krings_delete = netmap_bwrap_krings_delete;
+ na->nm_krings_create = netmap_vale_bwrap_krings_create;
+ na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
na->nm_notify = netmap_bwrap_notify;
- na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
- na->pdev = hwna->pdev;
- na->nm_mem = netmap_mem_get(hwna->nm_mem);
- na->virt_hdr_len = hwna->virt_hdr_len;
- na->rx_buf_maxsize = hwna->rx_buf_maxsize;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
/* Set the mfs, needed on the VALE mismatch datapath. */
bna->up.mfs = NM_BDG_MFS_DEFAULT;
- bna->hwna = hwna;
- netmap_adapter_get(hwna);
- hwna->na_private = bna; /* weak reference */
- bna->saved_na_vp = hwna->na_vp;
- hwna->na_vp = &bna->up;
- bna->up.up.na_vp = &(bna->up);
-
if (hwna->na_flags & NAF_HOST_RINGS) {
- if (hwna->na_flags & NAF_SW_ONLY)
- na->na_flags |= NAF_SW_ONLY;
- na->na_flags |= NAF_HOST_RINGS;
hostna = &bna->host.up;
- snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
- hostna->ifp = hwna->ifp;
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t);
- nma_set_nrings(hostna, t, 1);
- nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
- }
- // hostna->nm_txsync = netmap_bwrap_host_txsync;
- // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
hostna->nm_notify = netmap_bwrap_notify;
- hostna->nm_mem = netmap_mem_get(na->nm_mem);
- hostna->na_private = bna;
- hostna->na_vp = &bna->up;
- na->na_hostvp = hwna->na_hostvp =
- hostna->na_hostvp = &bna->host;
- hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
- hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
bna->host.mfs = NM_BDG_MFS_DEFAULT;
}
- ND("%s<->%s txr %d txd %d rxr %d rxd %d",
- na->name, ifp->if_xname,
- na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc);
-
- error = netmap_attach_common(na);
+ error = netmap_bwrap_attach_common(na, hwna);
if (error) {
- goto err_free;
+ nm_os_free(bna);
}
- hwna->na_flags |= NAF_BUSY;
- return 0;
-
-err_free:
- hwna->na_vp = hwna->na_hostvp = NULL;
- netmap_adapter_put(hwna);
- nm_os_free(bna);
return error;
-
}
-struct nm_bridge *
-netmap_init_bridges2(u_int n)
+int
+netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create)
{
- int i;
- struct nm_bridge *b;
+ return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
+}
+
- b = nm_os_malloc(sizeof(struct nm_bridge) * n);
- if (b == NULL)
- return NULL;
- for (i = 0; i < n; i++)
- BDG_RWINIT(&b[i]);
- return b;
+/* creates a persistent VALE port */
+int
+nm_vi_create(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_newif *req =
+ (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
+ int error = 0;
+ /* Build a nmreq_register out of the nmreq_vale_newif,
+ * so that we can call netmap_get_bdg_na(). */
+ struct nmreq_register regreq;
+ bzero(&regreq, sizeof(regreq));
+ regreq.nr_tx_slots = req->nr_tx_slots;
+ regreq.nr_rx_slots = req->nr_rx_slots;
+ regreq.nr_tx_rings = req->nr_tx_rings;
+ regreq.nr_rx_rings = req->nr_rx_rings;
+ regreq.nr_mem_id = req->nr_mem_id;
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uintptr_t)&regreq;
+ error = netmap_vi_create(hdr, 0 /* no autodelete */);
+ hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
+ hdr->nr_body = (uintptr_t)req;
+ /* Write back to the original struct. */
+ req->nr_tx_slots = regreq.nr_tx_slots;
+ req->nr_rx_slots = regreq.nr_rx_slots;
+ req->nr_tx_rings = regreq.nr_tx_rings;
+ req->nr_rx_rings = regreq.nr_rx_rings;
+ req->nr_mem_id = regreq.nr_mem_id;
+ return error;
}
-void
-netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
+/* remove a persistent VALE port from the system */
+int
+nm_vi_destroy(const char *name)
{
- int i;
+ struct ifnet *ifp;
+ struct netmap_vp_adapter *vpna;
+ int error;
- if (b == NULL)
- return;
+ ifp = ifunit_ref(name);
+ if (!ifp)
+ return ENXIO;
+ NMG_LOCK();
+ /* make sure this is actually a VALE port */
+ if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
+ error = EINVAL;
+ goto err;
+ }
+
+ vpna = (struct netmap_vp_adapter *)NA(ifp);
+
+ /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
+ if (vpna->autodelete) {
+ error = EINVAL;
+ goto err;
+ }
+
+ /* also make sure that nobody is using the inferface */
+ if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
+ vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
+ error = EBUSY;
+ goto err;
+ }
+
+ NMG_UNLOCK();
+
+ D("destroying a persistent vale interface %s", ifp->if_xname);
+ /* Linux requires all the references are released
+ * before unregister
+ */
+ netmap_detach(ifp);
+ if_rele(ifp);
+ nm_os_vi_detach(ifp);
+ return 0;
- for (i = 0; i < n; i++)
- BDG_RWDESTROY(&b[i]);
- nm_os_free(b);
+err:
+ NMG_UNLOCK();
+ if_rele(ifp);
+ return error;
}
-int
-netmap_init_bridges(void)
+static int
+nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
{
-#ifdef CONFIG_NET_NS
- return netmap_bns_register();
-#else
- nm_bridges = netmap_init_bridges2(NM_BRIDGES);
- if (nm_bridges == NULL)
- return ENOMEM;
- return 0;
-#endif
+ req->nr_rx_rings = na->num_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings;
+ req->nr_rx_slots = na->num_rx_desc;
+ req->nr_tx_slots = na->num_tx_desc;
+ return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
+ &req->nr_mem_id);
}
-void
-netmap_uninit_bridges(void)
+
+/*
+ * Create a virtual interface registered to the system.
+ * The interface will be attached to a bridge later.
+ */
+int
+netmap_vi_create(struct nmreq_header *hdr, int autodelete)
{
-#ifdef CONFIG_NET_NS
- netmap_bns_unregister();
-#else
- netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
-#endif
+ struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct ifnet *ifp;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_mem_d *nmd = NULL;
+ int error;
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ return EINVAL;
+ }
+
+ /* don't include VALE prefix */
+ if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
+ return EINVAL;
+ if (strlen(hdr->nr_name) >= IFNAMSIZ) {
+ return EINVAL;
+ }
+ ifp = ifunit_ref(hdr->nr_name);
+ if (ifp) { /* already exist, cannot create new one */
+ error = EEXIST;
+ NMG_LOCK();
+ if (NM_NA_VALID(ifp)) {
+ int update_err = nm_update_info(req, NA(ifp));
+ if (update_err)
+ error = update_err;
+ }
+ NMG_UNLOCK();
+ if_rele(ifp);
+ return error;
+ }
+ error = nm_os_vi_persist(hdr->nr_name, &ifp);
+ if (error)
+ return error;
+
+ NMG_LOCK();
+ if (req->nr_mem_id) {
+ nmd = netmap_mem_find(req->nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto err_1;
+ }
+ }
+ /* netmap_vp_create creates a struct netmap_vp_adapter */
+ error = netmap_vp_create(hdr, ifp, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto err_1;
+ }
+ /* persist-specific routines */
+ vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
+ if (!autodelete) {
+ netmap_adapter_get(&vpna->up);
+ } else {
+ vpna->autodelete = 1;
+ }
+ NM_ATTACH_NA(ifp, &vpna->up);
+ /* return the updated info */
+ error = nm_update_info(req, &vpna->up);
+ if (error) {
+ goto err_2;
+ }
+ ND("returning nr_mem_id %d", req->nr_mem_id);
+ if (nmd)
+ netmap_mem_put(nmd);
+ NMG_UNLOCK();
+ ND("created %s", ifp->if_xname);
+ return 0;
+
+err_2:
+ netmap_detach(ifp);
+err_1:
+ if (nmd)
+ netmap_mem_put(nmd);
+ NMG_UNLOCK();
+ nm_os_vi_detach(ifp);
+
+ return error;
}
+
#endif /* WITH_VALE */
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index 525b7f05a5e1..5c756526fb34 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -237,6 +237,8 @@ struct netmap_slot {
* are the number of fragments.
*/
+#define NETMAP_MAX_FRAGS 64 /* max number of fragments */
+
/*
* struct netmap_ring
diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
index a314ca02b81c..a86bcb6b6e3e 100644
--- a/sys/net/netmap_user.h
+++ b/sys/net/netmap_user.h
@@ -1029,20 +1029,35 @@ nm_inject(struct nm_desc *d, const void *buf, size_t size)
for (c = 0; c < n ; c++, ri++) {
/* compute current ring to use */
struct netmap_ring *ring;
- uint32_t i, idx;
+ uint32_t i, j, idx;
+ size_t rem;
if (ri > d->last_tx_ring)
ri = d->first_tx_ring;
ring = NETMAP_TXRING(d->nifp, ri);
- if (nm_ring_empty(ring)) {
- continue;
+ rem = size;
+ j = ring->cur;
+ while (rem > ring->nr_buf_size && j != ring->tail) {
+ rem -= ring->nr_buf_size;
+ j = nm_ring_next(ring, j);
}
+ if (j == ring->tail && rem > 0)
+ continue;
i = ring->cur;
+ while (i != j) {
+ idx = ring->slot[i].buf_idx;
+ ring->slot[i].len = ring->nr_buf_size;
+ ring->slot[i].flags = NS_MOREFRAG;
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size);
+ i = nm_ring_next(ring, i);
+ buf = (char *)buf + ring->nr_buf_size;
+ }
idx = ring->slot[i].buf_idx;
- ring->slot[i].len = size;
- nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
- d->cur_tx_ring = ri;
+ ring->slot[i].len = rem;
+ ring->slot[i].flags = 0;
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem);
ring->head = ring->cur = nm_ring_next(ring, i);
+ d->cur_tx_ring = ri;
return size;
}
return 0; /* fail */