aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_bdg.c
diff options
context:
space:
mode:
authorVincenzo Maffione <vmaffione@FreeBSD.org>2018-10-23 08:55:16 +0000
committerVincenzo Maffione <vmaffione@FreeBSD.org>2018-10-23 08:55:16 +0000
commit2a7db7a63de8af153b9a626780dd15ca26ae3596 (patch)
tree37c944c0992d8c89dea1efe83fa7d85ae3aabd0f /sys/dev/netmap/netmap_bdg.c
parent60b905ae2f3a31efd6809c790b5a02f574489716 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_bdg.c')
-rw-r--r--sys/dev/netmap/netmap_bdg.c1827
1 files changed, 1827 insertions, 0 deletions
diff --git a/sys/dev/netmap/netmap_bdg.c b/sys/dev/netmap/netmap_bdg.c
new file mode 100644
index 0000000000000..dd64b805cbf18
--- /dev/null
+++ b/sys/dev/netmap/netmap_bdg.c
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+ struct nm_bridge *b = vp->na_bdg;
+ if (b == NULL)
+ return NULL;
+ return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+ int colon_pos = -1;
+ int i;
+
+ if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ return -1;
+ }
+
+ for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+ if (name[i] == ':') {
+ colon_pos = i;
+ break;
+ } else if (!nm_is_id_char(name[i])) {
+ return -1;
+ }
+ }
+
+ if (strlen(name) - colon_pos > IFNAMSIZ) {
+ /* interface name too long */
+ return -1;
+ }
+
+ return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+ int i, namelen;
+ struct nm_bridge *b = NULL, *bridges;
+ u_int num_bridges;
+
+ NMG_LOCK_ASSERT();
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ namelen = nm_vale_name_validate(name);
+ if (namelen < 0) {
+ D("invalid bridge name %s", name ? name : NULL);
+ return NULL;
+ }
+
+ /* lookup the name, remember empty slot if there is one */
+ for (i = 0; i < num_bridges; i++) {
+ struct nm_bridge *x = bridges + i;
+
+ if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+ if (create && b == NULL)
+ b = x; /* record empty slot */
+ } else if (x->bdg_namelen != namelen) {
+ continue;
+ } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ b = x;
+ break;
+ }
+ }
+ if (i == num_bridges && b) { /* name not found, can create entry */
+ /* initialize the bridge */
+ ND("create new bridge %s with ports %d", b->bdg_basename,
+ b->bdg_active_ports);
+ b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ if (b->ht == NULL) {
+ D("failed to allocate hash table");
+ return NULL;
+ }
+ strncpy(b->bdg_basename, name, namelen);
+ b->bdg_namelen = namelen;
+ b->bdg_active_ports = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++)
+ b->bdg_port_index[i] = i;
+ /* set the default function */
+ b->bdg_ops = ops;
+ b->private_data = b->ht;
+ b->bdg_flags = 0;
+ NM_BNS_GET(b);
+ }
+ return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+ if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+ return EBUSY;
+ }
+
+ ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
+ b->bdg_ops = NULL;
+ b->bdg_flags = 0;
+ NM_BNS_PUT(b);
+ return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+ int s_hw = hw, s_sw = sw;
+ int i, lim =b->bdg_active_ports;
+ uint32_t *tmp = b->tmp_bdg_port_index;
+
+ /*
+ New algorithm:
+ make a copy of bdg_port_index;
+ lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+ in the array of bdg_port_index, replacing them with
+ entries from the bottom of the array;
+ decrement bdg_active_ports;
+ acquire BDG_WLOCK() and copy back the array.
+ */
+
+ if (netmap_verbose)
+ D("detach %d and %d (lim %d)", hw, sw, lim);
+ /* make a copy of the list of active ports, update it,
+ * and then copy back within BDG_WLOCK().
+ */
+ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+ if (hw >= 0 && tmp[i] == hw) {
+ ND("detach hw %d at %d", hw, i);
+ lim--; /* point to last active port */
+ tmp[i] = tmp[lim]; /* swap with i */
+ tmp[lim] = hw; /* now this is inactive */
+ hw = -1;
+ } else if (sw >= 0 && tmp[i] == sw) {
+ ND("detach sw %d at %d", sw, i);
+ lim--;
+ tmp[i] = tmp[lim];
+ tmp[lim] = sw;
+ sw = -1;
+ } else {
+ i++;
+ }
+ }
+ if (hw >= 0 || sw >= 0) {
+ D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ }
+
+ BDG_WLOCK(b);
+ if (b->bdg_ops->dtor)
+ b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ b->bdg_ports[s_hw] = NULL;
+ if (s_sw >= 0) {
+ b->bdg_ports[s_sw] = NULL;
+ }
+ memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ b->bdg_active_ports = lim;
+ BDG_WUNLOCK(b);
+
+ ND("now %d active ports", lim);
+ netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+ struct nm_bridge *b = vpna->na_bdg;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ return 0; /* nothing to do */
+ }
+ if (b) {
+ netmap_set_all_rings(na, 0 /* disable */);
+ netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+ vpna->na_bdg = NULL;
+ netmap_set_all_rings(na, 1 /* enable */);
+ }
+ /* I have took reference just for attach */
+ netmap_adapter_put(na);
+ return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+ char *nr_name = hdr->nr_name;
+ const char *ifname;
+ struct ifnet *ifp = NULL;
+ int error = 0;
+ struct netmap_vp_adapter *vpna, *hostna = NULL;
+ struct nm_bridge *b;
+ uint32_t i, j;
+ uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+ int needed;
+
+ *na = NULL; /* default return value */
+
+ /* first try to see if this is a bridge port. */
+ NMG_LOCK_ASSERT();
+ if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+ return 0; /* no error, but no VALE prefix */
+ }
+
+ b = nm_find_bridge(nr_name, create, ops);
+ if (b == NULL) {
+ ND("no bridges available for '%s'", nr_name);
+ return (create ? ENOMEM : ENXIO);
+ }
+ if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+ panic("x");
+
+ /* Now we are sure that name starts with the bridge's name,
+ * lookup the port in the bridge. We need to scan the entire
+ * list. It is not important to hold a WLOCK on the bridge
+ * during the search because NMG_LOCK already guarantees
+ * that there are no other possible writers.
+ */
+
+ /* lookup in the local list of ports */
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ ND("checking %s", vpna->up.name);
+ if (!strcmp(vpna->up.name, nr_name)) {
+ netmap_adapter_get(&vpna->up);
+ ND("found existing if %s refs %d", nr_name)
+ *na = &vpna->up;
+ return 0;
+ }
+ }
+ /* not found, should we create it? */
+ if (!create)
+ return ENXIO;
+ /* yes we should, see if we have space to attach entries */
+ needed = 2; /* in some cases we only need 1 */
+ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+ D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ return ENOMEM;
+ }
+ /* record the next two ports available, but do not allocate yet */
+ cand = b->bdg_port_index[b->bdg_active_ports];
+ cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+ ND("+++ bridge %s port %s used %d avail %d %d",
+ b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+ /*
+ * try see if there is a matching NIC with this name
+ * (after the bridge's name)
+ */
+ ifname = nr_name + b->bdg_namelen + 1;
+ ifp = ifunit_ref(ifname);
+ if (!ifp) {
+ /* Create an ephemeral virtual port.
+ * This block contains all the ephemeral-specific logic.
+ */
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* bdg_netmap_attach creates a struct netmap_adapter */
+ error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto out;
+ }
+ /* shortcut - we can skip get_hw_na(),
+ * ownership check and nm_bdg_attach()
+ */
+
+ } else {
+ struct netmap_adapter *hw;
+
+ /* the vale:nic syntax is only valid for some commands */
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_VALE_ATTACH:
+ case NETMAP_REQ_VALE_DETACH:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
+ break; /* ok */
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ error = netmap_get_hw_na(ifp, nmd, &hw);
+ if (error || hw == NULL)
+ goto out;
+
+ /* host adapter might not be created */
+ error = hw->nm_bdg_attach(nr_name, hw, b);
+ if (error == NM_NEED_BWRAP) {
+ error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ }
+ if (error)
+ goto out;
+ vpna = hw->na_vp;
+ hostna = hw->na_hostvp;
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ /* Check if we need to skip the host rings. */
+ struct nmreq_vale_attach *areq =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+ hostna = NULL;
+ }
+ }
+ }
+
+ BDG_WLOCK(b);
+ vpna->bdg_port = cand;
+ ND("NIC %p to bridge port %d", vpna, cand);
+ /* bind the port to the bridge (virtual ports are not active) */
+ b->bdg_ports[cand] = vpna;
+ vpna->na_bdg = b;
+ b->bdg_active_ports++;
+ if (hostna != NULL) {
+ /* also bind the host stack to the bridge */
+ b->bdg_ports[cand2] = hostna;
+ hostna->bdg_port = cand2;
+ hostna->na_bdg = b;
+ b->bdg_active_ports++;
+ ND("host %p to bridge port %d", hostna, cand2);
+ }
+ ND("if %s refs %d", ifname, vpna->up.na_refcount);
+ BDG_WUNLOCK(b);
+ *na = &vpna->up;
+ netmap_adapter_get(*na);
+
+out:
+ if (ifp)
+ if_rele(ifp);
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+ struct nm_kctx *nmk;
+ u_int qfirst;
+ u_int qlast;
+ struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+ bool configured;
+ bool stopped;
+ struct netmap_bwrap_adapter *bna;
+ uint32_t mode;
+ u_int qfirst;
+ u_int qlast;
+ u_int cpu_from;
+ u_int ncpus;
+ struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+ struct nm_bdg_kthread *nbk = data;
+ struct netmap_bwrap_adapter *bna;
+ u_int qfirst, qlast, i;
+ struct netmap_kring **kring0, *kring;
+
+ if (!nbk)
+ return;
+ qfirst = nbk->qfirst;
+ qlast = nbk->qlast;
+ bna = nbk->bps->bna;
+ kring0 = NMR(bna->hwna, NR_RX);
+
+ for (i = qfirst; i < qlast; i++) {
+ kring = kring0[i];
+ kring->nm_notify(kring, 0);
+ }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+ struct nm_kctx_cfg kcfg;
+ int i, j;
+
+ bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
+ if (bps->kthreads == NULL)
+ return ENOMEM;
+
+ bzero(&kcfg, sizeof(kcfg));
+ kcfg.worker_fn = netmap_bwrap_polling;
+ kcfg.use_kthread = 1;
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ int all = (bps->ncpus == 1 &&
+ bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+ int affinity = bps->cpu_from + i;
+
+ t->bps = bps;
+ t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+ t->qlast = all ? bps->qlast : t->qfirst + 1;
+ D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
+
+ kcfg.type = i;
+ kcfg.worker_private = t;
+ t->nmk = nm_os_kctx_create(&kcfg, NULL);
+ if (t->nmk == NULL) {
+ goto cleanup;
+ }
+ nm_os_kctx_worker_setaff(t->nmk, affinity);
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_destroy(t->nmk);
+ }
+ nm_os_free(bps->kthreads);
+ return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int error, i, j;
+
+ if (!bps) {
+ D("polling is not configured");
+ return EFAULT;
+ }
+ bps->stopped = false;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ error = nm_os_kctx_worker_start(t->nmk);
+ if (error) {
+ D("error in nm_kthread_start()");
+ goto cleanup;
+ }
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ }
+ bps->stopped = true;
+ return error;
+}
+
+static void
+nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int i;
+
+ if (!bps)
+ return;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ nm_os_kctx_destroy(t->nmk);
+ }
+ bps->stopped = true;
+}
+
+static int
+get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
+ struct nm_bdg_polling_state *bps)
+{
+ unsigned int avail_cpus, core_from;
+ unsigned int qfirst, qlast;
+ uint32_t i = req->nr_first_cpu_id;
+ uint32_t req_cpus = req->nr_num_polling_cpus;
+
+ avail_cpus = nm_os_ncpus();
+
+ if (req_cpus == 0) {
+ D("req_cpus must be > 0");
+ return EINVAL;
+ } else if (req_cpus >= avail_cpus) {
+ D("Cannot use all the CPUs in the system");
+ return EINVAL;
+ }
+
+ if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
+ /* Use a separate core for each ring. If nr_num_polling_cpus>1
+ * more consecutive rings are polled.
+ * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
+ * ring 2 and 3 are polled by core 2 and 3, respectively. */
+ if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
+ D("Rings %u-%u not in range (have %d rings)",
+ i, i + req_cpus, nma_get_nrings(na, NR_RX));
+ return EINVAL;
+ }
+ qfirst = i;
+ qlast = qfirst + req_cpus;
+ core_from = qfirst;
+
+ } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
+ /* Poll all the rings using a core specified by nr_first_cpu_id.
+ * the number of cores must be 1. */
+ if (req_cpus != 1) {
+ D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ "(was %d)", req_cpus);
+ return EINVAL;
+ }
+ qfirst = 0;
+ qlast = nma_get_nrings(na, NR_RX);
+ core_from = i;
+ } else {
+ D("Invalid polling mode");
+ return EINVAL;
+ }
+
+ bps->mode = req->nr_mode;
+ bps->qfirst = qfirst;
+ bps->qlast = qlast;
+ bps->cpu_from = core_from;
+ bps->ncpus = req_cpus;
+ D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
+ "MULTI" : "SINGLE",
+ qfirst, qlast, core_from, req_cpus);
+ return 0;
+}
+
+static int
+nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
+{
+ struct nm_bdg_polling_state *bps;
+ struct netmap_bwrap_adapter *bna;
+ int error;
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ if (bna->na_polling_state) {
+ D("ERROR adapter already in polling mode");
+ return EFAULT;
+ }
+
+ bps = nm_os_malloc(sizeof(*bps));
+ if (!bps)
+ return ENOMEM;
+ bps->configured = false;
+ bps->stopped = true;
+
+ if (get_polling_cfg(req, na, bps)) {
+ nm_os_free(bps);
+ return EINVAL;
+ }
+
+ if (nm_bdg_create_kthreads(bps)) {
+ nm_os_free(bps);
+ return EFAULT;
+ }
+
+ bps->configured = true;
+ bna->na_polling_state = bps;
+ bps->bna = bna;
+
+ /* disable interrupts if possible */
+ nma_intr_enable(bna->hwna, 0);
+ /* start kthread now */
+ error = nm_bdg_polling_start_kthreads(bps);
+ if (error) {
+ D("ERROR nm_bdg_polling_start_kthread()");
+ nm_os_free(bps->kthreads);
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ nma_intr_enable(bna->hwna, 1);
+ }
+ return error;
+}
+
+static int
+nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
+ struct nm_bdg_polling_state *bps;
+
+ if (!bna->na_polling_state) {
+ D("ERROR adapter is not in polling mode");
+ return EFAULT;
+ }
+ bps = bna->na_polling_state;
+ nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
+ bps->configured = false;
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ /* reenable interrupts */
+ nma_intr_enable(bna->hwna, 1);
+ return 0;
+}
+
+int
+nm_bdg_polling(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_polling *req =
+ (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
+ struct netmap_adapter *na = NULL;
+ int error = 0;
+
+ NMG_LOCK();
+ error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
+ if (na && !error) {
+ if (!nm_is_bwrap(na)) {
+ error = EOPNOTSUPP;
+ } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
+ error = nm_bdg_ctl_polling_start(req, na);
+ if (!error)
+ netmap_adapter_get(na);
+ } else {
+ error = nm_bdg_ctl_polling_stop(na);
+ if (!error)
+ netmap_adapter_put(na);
+ }
+ netmap_adapter_put(na);
+ } else if (!na && !error) {
+ /* Not VALE port. */
+ error = EINVAL;
+ }
+ NMG_UNLOCK();
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_bdg_list(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
+ struct nm_bridge *b, *bridges;
+ struct netmap_vp_adapter *vpna;
+ int error = 0, i, j;
+ u_int num_bridges;
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return ENOENT;
+ }
+
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ D("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strncpy(hdr->nr_name, vpna->up.name,
+ (size_t)IFNAMSIZ);
+ error = 0;
+ goto out;
+ }
+ j = 0; /* following bridges scan from 0 */
+ }
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
+ NMG_UNLOCK();
+ }
+
+ return error;
+}
+
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to set configure/lookup/dtor functions of a VALE instance.
+ * Register callbacks to the given bridge. 'name' may be just
+ * bridge's name (including ':' if it is not just NM_BDG_NAME).
+ *
+ * Called without NMG_LOCK.
+ */
+
+int
+netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
+{
+ struct nm_bridge *b;
+ int error = 0;
+
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
+ if (!b) {
+ error = ENXIO;
+ goto unlock_regops;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_regops;
+ }
+
+ BDG_WLOCK(b);
+ if (!bdg_ops) {
+ /* resetting the bridge */
+ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ b->bdg_ops = NULL;
+ b->private_data = b->ht;
+ } else {
+ /* modifying the bridge */
+ b->private_data = private_data;
+ b->bdg_ops = bdg_ops;
+ }
+ BDG_WUNLOCK(b);
+
+unlock_regops:
+ NMG_UNLOCK();
+ return error;
+}
+
+
+int
+netmap_bdg_config(struct nm_ifreq *nr)
+{
+ struct nm_bridge *b;
+ int error = EINVAL;
+
+ NMG_LOCK();
+ b = nm_find_bridge(nr->nifr_name, 0, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return error;
+ }
+ NMG_UNLOCK();
+ /* Don't call config() with NMG_LOCK() held */
+ BDG_RLOCK(b);
+ if (b->bdg_ops->config != NULL)
+ error = b->bdg_ops->config(nr);
+ BDG_RUNLOCK(b);
+ return error;
+}
+
+
+/* nm_register callback for VALE ports */
+int
+netmap_vp_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_vp_adapter *vpna =
+ (struct netmap_vp_adapter*)na;
+ enum txrx t;
+ int i;
+
+ /* persistent ports may be put in netmap mode
+ * before being attached to a bridge
+ */
+ if (vpna->na_bdg)
+ BDG_WLOCK(vpna->na_bdg);
+ if (onoff) {
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_on(kring))
+ kring->nr_mode = NKR_NETMAP_ON;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ /* XXX on FreeBSD, persistent VALE ports should also
+ * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
+ */
+ } else {
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_off(kring))
+ kring->nr_mode = NKR_NETMAP_OFF;
+ }
+ }
+ }
+ if (vpna->na_bdg)
+ BDG_WUNLOCK(vpna->na_bdg);
+ return 0;
+}
+
+
+/* rxsync code used by VALE ports nm_rxsync callback and also
+ * internally by the brwap
+ */
+static int
+netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_ring *ring = kring->ring;
+ u_int nm_i, lim = kring->nkr_num_slots - 1;
+ u_int head = kring->rhead;
+ int n;
+
+ if (head > lim) {
+ D("ouch dangerous reset!!!");
+ n = netmap_ring_reinit(kring);
+ goto done;
+ }
+
+ /* First part, import newly received packets. */
+ /* actually nothing to do here, they are already in the kring */
+
+ /* Second part, skip past packets that userspace has released. */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) {
+ /* consistency check, but nothing really important here */
+ for (n = 0; likely(nm_i != head); n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ void *addr = NMB(na, slot);
+
+ if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
+ D("bad buffer index %d, ignore ?",
+ slot->buf_idx);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+ nm_i = nm_next(nm_i, lim);
+ }
+ kring->nr_hwcur = head;
+ }
+
+ n = 0;
+done:
+ return n;
+}
+
+/*
+ * nm_rxsync callback for VALE ports
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+int
+netmap_vp_rxsync(struct netmap_kring *kring, int flags)
+{
+ int n;
+
+ mtx_lock(&kring->q_lock);
+ n = netmap_vp_rxsync_locked(kring, flags);
+ mtx_unlock(&kring->q_lock);
+ return n;
+}
+
+int
+netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
+ struct netmap_bdg_ops *ops)
+{
+ return ops->bwrap_attach(nr_name, hwna);
+}
+
+
+/* Bridge wrapper code (bwrap).
+ * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
+ * VALE switch.
+ * The main task is to swap the meaning of tx and rx rings to match the
+ * expectations of the VALE switch code (see nm_bdg_flush).
+ *
+ * The bwrap works by interposing a netmap_bwrap_adapter between the
+ * rest of the system and the hwna. The netmap_bwrap_adapter looks like
+ * a netmap_vp_adapter to the rest the system, but, internally, it
+ * translates all callbacks to what the hwna expects.
+ *
+ * Note that we have to intercept callbacks coming from two sides:
+ *
+ * - callbacks coming from the netmap module are intercepted by
+ * passing around the netmap_bwrap_adapter instead of the hwna
+ *
+ * - callbacks coming from outside of the netmap module only know
+ * about the hwna. This, however, only happens in interrupt
+ * handlers, where only the hwna->nm_notify callback is called.
+ * What the bwrap does is to overwrite the hwna->nm_notify callback
+ * with its own netmap_bwrap_intr_notify.
+ * XXX This assumes that the hwna->nm_notify callback was the
+ * standard netmap_notify(), as it is the case for nic adapters.
+ * Any additional action performed by hwna->nm_notify will not be
+ * performed by netmap_bwrap_intr_notify.
+ *
+ * Additionally, the bwrap can optionally attach the host rings pair
+ * of the wrapped adapter to a different port of the switch.
+ */
+
+
+static void
+netmap_bwrap_dtor(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct nm_bridge *b = bna->up.na_bdg,
+ *bh = bna->host.na_bdg;
+
+ if (bna->host.up.nm_mem)
+ netmap_mem_put(bna->host.up.nm_mem);
+
+ if (b) {
+ netmap_bdg_detach_common(b, bna->up.bdg_port,
+ (bh ? bna->host.bdg_port : -1));
+ }
+
+ ND("na %p", na);
+ na->ifp = NULL;
+ bna->host.up.ifp = NULL;
+ hwna->na_vp = bna->saved_na_vp;
+ hwna->na_hostvp = NULL;
+ hwna->na_private = NULL;
+ hwna->na_flags &= ~NAF_BUSY;
+ netmap_adapter_put(hwna);
+
+}
+
+
+/*
+ * Intr callback for NICs connected to a bridge.
+ * Simply ignore tx interrupts (maybe we could try to recover space ?)
+ * and pass received packets from nic to the bridge.
+ *
+ * XXX TODO check locking: this is called from the interrupt
+ * handler so we should make sure that the interface is not
+ * disconnected while passing down an interrupt.
+ *
+ * Note, no user process can access this NIC or the host stack.
+ * The only part of the ring that is significant are the slots,
+ * and head/cur/tail are set from the kring as needed
+ * (part as a receive ring, part as a transmit ring).
+ *
+ * callback that overwrites the hwna notify callback.
+ * Packets come from the outside or from the host stack and are put on an
+ * hwna rx ring.
+ * The bridge wrapper then sends the packets through the bridge.
+ */
+static int
+netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_kring *bkring;
+ struct netmap_vp_adapter *vpna = &bna->up;
+ u_int ring_nr = kring->ring_id;
+ int ret = NM_IRQ_COMPLETED;
+ int error;
+
+ if (netmap_verbose)
+ D("%s %s 0x%x", na->name, kring->name, flags);
+
+ bkring = vpna->up.tx_rings[ring_nr];
+
+ /* make sure the ring is not disabled */
+ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
+ return EIO;
+ }
+
+ if (netmap_verbose)
+ D("%s head %d cur %d tail %d", na->name,
+ kring->rhead, kring->rcur, kring->rtail);
+
+ /* simulate a user wakeup on the rx ring
+ * fetch packets that have arrived.
+ */
+ error = kring->nm_sync(kring, 0);
+ if (error)
+ goto put_out;
+ if (kring->nr_hwcur == kring->nr_hwtail) {
+ if (netmap_verbose)
+ D("how strange, interrupt with no packets on %s",
+ na->name);
+ goto put_out;
+ }
+
+ /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
+ * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
+ * to push all packets out.
+ */
+ bkring->rhead = bkring->rcur = kring->nr_hwtail;
+
+ bkring->nm_sync(bkring, flags);
+
+ /* mark all buffers as released on this ring */
+ kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
+ /* another call to actually release the buffers */
+ error = kring->nm_sync(kring, 0);
+
+ /* The second rxsync may have further advanced hwtail. If this happens,
+ * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
+ if (kring->rcur != kring->nr_hwtail) {
+ ret = NM_IRQ_RESCHED;
+ }
+put_out:
+ nm_kr_put(kring);
+
+ return error ? error : ret;
+}
+
+
+/* nm_register callback for bwrap */
+int
+netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_vp_adapter *hostna = &bna->host;
+ int error, i;
+ enum txrx t;
+
+ ND("%s %s", na->name, onoff ? "on" : "off");
+
+ if (onoff) {
+ /* netmap_do_regif has been called on the bwrap na.
+ * We need to pass the information about the
+ * memory allocator down to the hwna before
+ * putting it in netmap mode
+ */
+ hwna->na_lut = na->na_lut;
+
+ if (hostna->na_bdg) {
+ /* if the host rings have been attached to switch,
+ * we need to copy the memory allocator information
+ * in the hostna also
+ */
+ hostna->up.na_lut = na->na_lut;
+ }
+
+ }
+
+ /* pass down the pending ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
+ NMR(na, t)[i]->nr_pending_mode;
+ }
+ }
+
+ /* forward the request to the hwna */
+ error = hwna->nm_register(hwna, onoff);
+ if (error)
+ return error;
+
+ /* copy up the current ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
+ NMR(na, t)[i]->nr_mode = kring->nr_mode;
+ }
+ }
+
+ /* impersonate a netmap_vp_adapter */
+ netmap_vp_reg(na, onoff);
+ if (hostna->na_bdg)
+ netmap_vp_reg(&hostna->up, onoff);
+
+ if (onoff) {
+ u_int i;
+ /* intercept the hwna nm_nofify callback on the hw rings */
+ for (i = 0; i < hwna->num_rx_rings; i++) {
+ hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
+ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
+ }
+ i = hwna->num_rx_rings; /* for safety */
+ /* save the host ring notify unconditionally */
+ for (; i < netmap_real_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->save_notify =
+ hwna->rx_rings[i]->nm_notify;
+ if (hostna->na_bdg) {
+ /* also intercept the host ring notify */
+ hwna->rx_rings[i]->nm_notify =
+ netmap_bwrap_intr_notify;
+ na->tx_rings[i]->nm_sync = na->nm_txsync;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ } else {
+ u_int i;
+
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+
+ /* reset all notify callbacks (including host ring) */
+ for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->nm_notify =
+ hwna->rx_rings[i]->save_notify;
+ hwna->rx_rings[i]->save_notify = NULL;
+ }
+ hwna->na_lut.lut = NULL;
+ hwna->na_lut.plut = NULL;
+ hwna->na_lut.objtotal = 0;
+ hwna->na_lut.objsize = 0;
+
+ /* pass ownership of the netmap rings to the hwna */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(na, t)[i]->ring = NULL;
+ }
+ }
+ /* reset the number of host rings to default */
+ for_rx_tx(t) {
+ nma_set_host_nrings(hwna, t, 1);
+ }
+
+ }
+
+ return 0;
+}
+
+/* nm_config callback for bwrap */
+static int
+netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ int error;
+
+ /* Forward the request to the hwna. It may happen that nobody
+ * registered hwna yet, so netmap_mem_get_lut() may have not
+ * been called yet. */
+ error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
+ if (error)
+ return error;
+ netmap_update_config(hwna);
+ /* swap the results and propagate */
+ info->num_tx_rings = hwna->num_rx_rings;
+ info->num_tx_descs = hwna->num_rx_desc;
+ info->num_rx_rings = hwna->num_tx_rings;
+ info->num_rx_descs = hwna->num_tx_desc;
+ info->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ return 0;
+}
+
+
+/* nm_krings_create callback for bwrap */
+int
+netmap_bwrap_krings_create_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_adapter *hostna = &bna->host.up;
+ int i, error = 0;
+ enum txrx t;
+
+ /* also create the hwna krings */
+ error = hwna->nm_krings_create(hwna);
+ if (error) {
+ return error;
+ }
+
+ /* increment the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users++;
+ }
+ }
+
+ /* now create the actual rings */
+ error = netmap_mem_rings_create(hwna);
+ if (error) {
+ goto err_dec_users;
+ }
+
+ /* cross-link the netmap rings
+ * The original number of rings comes from hwna,
+ * rx rings on one side equals tx rings on the other.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < netmap_all_rings(hwna, r); i++) {
+ NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
+ NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
+ }
+ }
+
+ if (na->na_flags & NAF_HOST_RINGS) {
+ /* the hostna rings are the host rings of the bwrap.
+ * The corresponding krings must point back to the
+ * hostna
+ */
+ hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
+ hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(hostna, t); i++) {
+ NMR(hostna, t)[i]->na = hostna;
+ }
+ }
+ }
+
+ return 0;
+
+err_dec_users:
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+ hwna->nm_krings_delete(hwna);
+ return error;
+}
+
+
+void
+netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ enum txrx t;
+ int i;
+
+ ND("%s", na->name);
+
+ /* decrement the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+
+ /* delete any netmap rings that are no longer needed */
+ netmap_mem_rings_delete(hwna);
+ hwna->nm_krings_delete(hwna);
+}
+
+
+/* notify method for the bridge-->hwna direction */
+int
+netmap_bwrap_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_adapter *hwna = bna->hwna;
+ u_int ring_n = kring->ring_id;
+ u_int lim = kring->nkr_num_slots - 1;
+ struct netmap_kring *hw_kring;
+ int error;
+
+ ND("%s: na %s hwna %s",
+ (kring ? kring->name : "NULL!"),
+ (na ? na->name : "NULL!"),
+ (hwna ? hwna->name : "NULL!"));
+ hw_kring = hwna->tx_rings[ring_n];
+
+ if (nm_kr_tryget(hw_kring, 0, NULL)) {
+ return ENXIO;
+ }
+
+ /* first step: simulate a user wakeup on the rx ring */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ /* second step: the new packets are sent on the tx ring
+ * (which is actually the same ring)
+ */
+ hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
+ error = hw_kring->nm_sync(hw_kring, flags);
+ if (error)
+ goto put_out;
+
+ /* third step: now we are back the rx ring */
+ /* claim ownership on all hw owned bufs */
+ kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
+
+ /* fourth step: the user goes to sleep again, causing another rxsync */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
+put_out:
+ nm_kr_put(hw_kring);
+
+ return error ? error : NM_IRQ_COMPLETED;
+}
+
+
+/* nm_bdg_ctl callback for the bwrap.
+ * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
+ * On attach, it needs to provide a fake netmap_priv_d structure and
+ * perform a netmap_do_regif() on the bwrap. This will put both the
+ * bwrap and the hwna in netmap mode, with the netmap rings shared
+ * and cross linked. Moroever, it will start intercepting interrupts
+ * directed to hwna.
+ */
+static int
+netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_priv_d *npriv;
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ int error = 0;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (req->reg.nr_ringid != 0 ||
+ (req->reg.nr_mode != NR_REG_ALL_NIC &&
+ req->reg.nr_mode != NR_REG_NIC_SW)) {
+ /* We only support attaching all the NIC rings
+ * and/or the host stack. */
+ return EINVAL;
+ }
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ return EBUSY;
+ }
+ if (bna->na_kpriv) {
+ /* nothing to do */
+ return 0;
+ }
+ npriv = netmap_priv_new();
+ if (npriv == NULL)
+ return ENOMEM;
+ npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
+ error = netmap_do_regif(npriv, na, req->reg.nr_mode,
+ req->reg.nr_ringid, req->reg.nr_flags);
+ if (error) {
+ netmap_priv_delete(npriv);
+ return error;
+ }
+ bna->na_kpriv = npriv;
+ na->na_flags |= NAF_BUSY;
+ } else {
+ if (na->active_fds == 0) /* not registered */
+ return EINVAL;
+ netmap_priv_delete(bna->na_kpriv);
+ bna->na_kpriv = NULL;
+ na->na_flags &= ~NAF_BUSY;
+ }
+
+ return error;
+}
+
+/* attach a bridge wrapper to the 'real' device */
+int
+netmap_bwrap_attach_common(struct netmap_adapter *na,
+ struct netmap_adapter *hwna)
+{
+ struct netmap_bwrap_adapter *bna;
+ struct netmap_adapter *hostna = NULL;
+ int error = 0;
+ enum txrx t;
+
+ /* make sure the NIC is not already in use */
+ if (NETMAP_OWNED_BY_ANY(hwna)) {
+ D("NIC %s busy, cannot attach to bridge", hwna->name);
+ return EBUSY;
+ }
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ /* make bwrap ifp point to the real ifp */
+ na->ifp = hwna->ifp;
+ if_ref(na->ifp);
+ na->na_private = bna;
+ /* fill the ring data for the bwrap adapter with rx/tx meanings
+ * swapped. The real cross-linking will be done during register,
+ * when all the krings will have been created.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ nma_set_nrings(na, t, nma_get_nrings(hwna, r));
+ nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
+ }
+ na->nm_dtor = netmap_bwrap_dtor;
+ na->nm_config = netmap_bwrap_config;
+ na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
+ na->pdev = hwna->pdev;
+ na->nm_mem = netmap_mem_get(hwna->nm_mem);
+ na->virt_hdr_len = hwna->virt_hdr_len;
+ na->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ bna->hwna = hwna;
+ netmap_adapter_get(hwna);
+ hwna->na_private = bna; /* weak reference */
+ bna->saved_na_vp = hwna->na_vp;
+ hwna->na_vp = &bna->up;
+ bna->up.up.na_vp = &(bna->up);
+
+ if (hwna->na_flags & NAF_HOST_RINGS) {
+ if (hwna->na_flags & NAF_SW_ONLY)
+ na->na_flags |= NAF_SW_ONLY;
+ na->na_flags |= NAF_HOST_RINGS;
+ hostna = &bna->host.up;
+
+ /* limit the number of host rings to that of hw */
+ nm_bound_var(&hostna->num_tx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_TX), NULL);
+ nm_bound_var(&hostna->num_rx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_RX), NULL);
+
+ snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
+ hostna->ifp = hwna->ifp;
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t);
+ u_int nr = nma_get_nrings(hostna, t);
+
+ nma_set_nrings(hostna, t, nr);
+ nma_set_host_nrings(na, t, nr);
+ if (nma_get_host_nrings(hwna, t) < nr) {
+ nma_set_host_nrings(hwna, t, nr);
+ }
+ nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
+ }
+ // hostna->nm_txsync = netmap_bwrap_host_txsync;
+ // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+ hostna->nm_mem = netmap_mem_get(na->nm_mem);
+ hostna->na_private = bna;
+ hostna->na_vp = &bna->up;
+ na->na_hostvp = hwna->na_hostvp =
+ hostna->na_hostvp = &bna->host;
+ hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
+ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
+ }
+
+ ND("%s<->%s txr %d txd %d rxr %d rxd %d",
+ na->name, ifp->if_xname,
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+
+ error = netmap_attach_common(na);
+ if (error) {
+ goto err_put;
+ }
+ hwna->na_flags |= NAF_BUSY;
+ return 0;
+
+err_put:
+ hwna->na_vp = hwna->na_hostvp = NULL;
+ netmap_adapter_put(hwna);
+ return error;
+
+}
+
+struct nm_bridge *
+netmap_init_bridges2(u_int n)
+{
+ int i;
+ struct nm_bridge *b;
+
+ b = nm_os_malloc(sizeof(struct nm_bridge) * n);
+ if (b == NULL)
+ return NULL;
+ for (i = 0; i < n; i++)
+ BDG_RWINIT(&b[i]);
+ return b;
+}
+
+void
+netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
+{
+ int i;
+
+ if (b == NULL)
+ return;
+
+ for (i = 0; i < n; i++)
+ BDG_RWDESTROY(&b[i]);
+ nm_os_free(b);
+}
+
+int
+netmap_init_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ return netmap_bns_register();
+#else
+ nm_bridges = netmap_init_bridges2(NM_BRIDGES);
+ if (nm_bridges == NULL)
+ return ENOMEM;
+ return 0;
+#endif
+}
+
+void
+netmap_uninit_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ netmap_bns_unregister();
+#else
+ netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
+#endif
+}