diff options
| author | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-10-23 08:55:16 +0000 |
|---|---|---|
| committer | Vincenzo Maffione <vmaffione@FreeBSD.org> | 2018-10-23 08:55:16 +0000 |
| commit | 2a7db7a63de8af153b9a626780dd15ca26ae3596 (patch) | |
| tree | 37c944c0992d8c89dea1efe83fa7d85ae3aabd0f /sys/dev/netmap/netmap_bdg.c | |
| parent | 60b905ae2f3a31efd6809c790b5a02f574489716 (diff) | |
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_bdg.c')
| -rw-r--r-- | sys/dev/netmap/netmap_bdg.c | 1827 |
1 files changed, 1827 insertions, 0 deletions
diff --git a/sys/dev/netmap/netmap_bdg.c b/sys/dev/netmap/netmap_bdg.c new file mode 100644 index 0000000000000..dd64b805cbf18 --- /dev/null +++ b/sys/dev/netmap/netmap_bdg.c @@ -0,0 +1,1827 @@ +/* + * Copyright (C) 2013-2016 Universita` di Pisa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +/* + * This module implements the VALE switch for netmap + +--- VALE SWITCH --- + +NMG_LOCK() serializes all modifications to switches and ports. +A switch cannot be deleted until all ports are gone. + +For each switch, an SX lock (RWlock on linux) protects +deletion of ports. When configuring or deleting a new port, the +lock is acquired in exclusive mode (after holding NMG_LOCK). +When forwarding, the lock is acquired in shared mode (without NMG_LOCK). +The lock is held throughout the entire forwarding cycle, +during which the thread may incur in a page fault. +Hence it is important that sleepable shared locks are used. + +On the rx ring, the per-port lock is grabbed initially to reserve +a number of slot in the ring, then the lock is released, +packets are copied from source to destination, and then +the lock is acquired again and the receive ring is updated. +(A similar thing is done on the tx ring for NIC and host stack +ports attached to the switch) + + */ + +/* + * OS-specific code that is used only within this file. + * Other OS-specific code that must be accessed by drivers + * is present in netmap_kern.h + */ + +#if defined(__FreeBSD__) +#include <sys/cdefs.h> /* prerequisite */ +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/param.h> /* defines used in kernel.h */ +#include <sys/kernel.h> /* types used in module initialization */ +#include <sys/conf.h> /* cdevsw struct, UID, GID */ +#include <sys/sockio.h> +#include <sys/socketvar.h> /* struct socket */ +#include <sys/malloc.h> +#include <sys/poll.h> +#include <sys/rwlock.h> +#include <sys/socket.h> /* sockaddrs */ +#include <sys/selinfo.h> +#include <sys/sysctl.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/bpf.h> /* BIOCIMMEDIATE */ +#include <machine/bus.h> /* bus_dmamap_* */ +#include <sys/endian.h> +#include <sys/refcount.h> +#include <sys/smp.h> + + +#elif defined(linux) + +#include "bsd_glue.h" + +#elif defined(__APPLE__) + +#warning OSX support is only partial +#include "osx_glue.h" + +#elif defined(_WIN32) +#include "win_glue.h" + +#else + +#error Unsupported platform + +#endif /* unsupported */ + +/* + * common headers + */ + +#include <net/netmap.h> +#include <dev/netmap/netmap_kern.h> +#include <dev/netmap/netmap_mem2.h> + +#include <dev/netmap/netmap_bdg.h> + +const char* +netmap_bdg_name(struct netmap_vp_adapter *vp) +{ + struct nm_bridge *b = vp->na_bdg; + if (b == NULL) + return NULL; + return b->bdg_basename; +} + + +#ifndef CONFIG_NET_NS +/* + * XXX in principle nm_bridges could be created dynamically + * Right now we have a static array and deletions are protected + * by an exclusive lock. + */ +static struct nm_bridge *nm_bridges; +#endif /* !CONFIG_NET_NS */ + + +static int +nm_is_id_char(const char c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_'); +} + +/* Validate the name of a VALE bridge port and return the + * position of the ":" character. */ +static int +nm_vale_name_validate(const char *name) +{ + int colon_pos = -1; + int i; + + if (!name || strlen(name) < strlen(NM_BDG_NAME)) { + return -1; + } + + for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { + if (name[i] == ':') { + colon_pos = i; + break; + } else if (!nm_is_id_char(name[i])) { + return -1; + } + } + + if (strlen(name) - colon_pos > IFNAMSIZ) { + /* interface name too long */ + return -1; + } + + return colon_pos; +} + +/* + * locate a bridge among the existing ones. + * MUST BE CALLED WITH NMG_LOCK() + * + * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. + * We assume that this is called with a name of at least NM_NAME chars. + */ +struct nm_bridge * +nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) +{ + int i, namelen; + struct nm_bridge *b = NULL, *bridges; + u_int num_bridges; + + NMG_LOCK_ASSERT(); + + netmap_bns_getbridges(&bridges, &num_bridges); + + namelen = nm_vale_name_validate(name); + if (namelen < 0) { + D("invalid bridge name %s", name ? name : NULL); + return NULL; + } + + /* lookup the name, remember empty slot if there is one */ + for (i = 0; i < num_bridges; i++) { + struct nm_bridge *x = bridges + i; + + if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { + if (create && b == NULL) + b = x; /* record empty slot */ + } else if (x->bdg_namelen != namelen) { + continue; + } else if (strncmp(name, x->bdg_basename, namelen) == 0) { + ND("found '%.*s' at %d", namelen, name, i); + b = x; + break; + } + } + if (i == num_bridges && b) { /* name not found, can create entry */ + /* initialize the bridge */ + ND("create new bridge %s with ports %d", b->bdg_basename, + b->bdg_active_ports); + b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); + if (b->ht == NULL) { + D("failed to allocate hash table"); + return NULL; + } + strncpy(b->bdg_basename, name, namelen); + b->bdg_namelen = namelen; + b->bdg_active_ports = 0; + for (i = 0; i < NM_BDG_MAXPORTS; i++) + b->bdg_port_index[i] = i; + /* set the default function */ + b->bdg_ops = ops; + b->private_data = b->ht; + b->bdg_flags = 0; + NM_BNS_GET(b); + } + return b; +} + + +int +netmap_bdg_free(struct nm_bridge *b) +{ + if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { + return EBUSY; + } + + ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); + b->bdg_ops = NULL; + b->bdg_flags = 0; + NM_BNS_PUT(b); + return 0; +} + + +/* remove from bridge b the ports in slots hw and sw + * (sw can be -1 if not needed) + */ +void +netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) +{ + int s_hw = hw, s_sw = sw; + int i, lim =b->bdg_active_ports; + uint32_t *tmp = b->tmp_bdg_port_index; + + /* + New algorithm: + make a copy of bdg_port_index; + lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port + in the array of bdg_port_index, replacing them with + entries from the bottom of the array; + decrement bdg_active_ports; + acquire BDG_WLOCK() and copy back the array. + */ + + if (netmap_verbose) + D("detach %d and %d (lim %d)", hw, sw, lim); + /* make a copy of the list of active ports, update it, + * and then copy back within BDG_WLOCK(). + */ + memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); + for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { + if (hw >= 0 && tmp[i] == hw) { + ND("detach hw %d at %d", hw, i); + lim--; /* point to last active port */ + tmp[i] = tmp[lim]; /* swap with i */ + tmp[lim] = hw; /* now this is inactive */ + hw = -1; + } else if (sw >= 0 && tmp[i] == sw) { + ND("detach sw %d at %d", sw, i); + lim--; + tmp[i] = tmp[lim]; + tmp[lim] = sw; + sw = -1; + } else { + i++; + } + } + if (hw >= 0 || sw >= 0) { + D("XXX delete failed hw %d sw %d, should panic...", hw, sw); + } + + BDG_WLOCK(b); + if (b->bdg_ops->dtor) + b->bdg_ops->dtor(b->bdg_ports[s_hw]); + b->bdg_ports[s_hw] = NULL; + if (s_sw >= 0) { + b->bdg_ports[s_sw] = NULL; + } + memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); + b->bdg_active_ports = lim; + BDG_WUNLOCK(b); + + ND("now %d active ports", lim); + netmap_bdg_free(b); +} + + +/* nm_bdg_ctl callback for VALE ports */ +int +netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) +{ + struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; + struct nm_bridge *b = vpna->na_bdg; + + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + return 0; /* nothing to do */ + } + if (b) { + netmap_set_all_rings(na, 0 /* disable */); + netmap_bdg_detach_common(b, vpna->bdg_port, -1); + vpna->na_bdg = NULL; + netmap_set_all_rings(na, 1 /* enable */); + } + /* I have took reference just for attach */ + netmap_adapter_put(na); + return 0; +} + +int +netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *b) +{ + return NM_NEED_BWRAP; +} + +/* Try to get a reference to a netmap adapter attached to a VALE switch. + * If the adapter is found (or is created), this function returns 0, a + * non NULL pointer is returned into *na, and the caller holds a + * reference to the adapter. + * If an adapter is not found, then no reference is grabbed and the + * function returns an error code, or 0 if there is just a VALE prefix + * mismatch. Therefore the caller holds a reference when + * (*na != NULL && return == 0). + */ +int +netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) +{ + char *nr_name = hdr->nr_name; + const char *ifname; + struct ifnet *ifp = NULL; + int error = 0; + struct netmap_vp_adapter *vpna, *hostna = NULL; + struct nm_bridge *b; + uint32_t i, j; + uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; + int needed; + + *na = NULL; /* default return value */ + + /* first try to see if this is a bridge port. */ + NMG_LOCK_ASSERT(); + if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { + return 0; /* no error, but no VALE prefix */ + } + + b = nm_find_bridge(nr_name, create, ops); + if (b == NULL) { + ND("no bridges available for '%s'", nr_name); + return (create ? ENOMEM : ENXIO); + } + if (strlen(nr_name) < b->bdg_namelen) /* impossible */ + panic("x"); + + /* Now we are sure that name starts with the bridge's name, + * lookup the port in the bridge. We need to scan the entire + * list. It is not important to hold a WLOCK on the bridge + * during the search because NMG_LOCK already guarantees + * that there are no other possible writers. + */ + + /* lookup in the local list of ports */ + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + ND("checking %s", vpna->up.name); + if (!strcmp(vpna->up.name, nr_name)) { + netmap_adapter_get(&vpna->up); + ND("found existing if %s refs %d", nr_name) + *na = &vpna->up; + return 0; + } + } + /* not found, should we create it? */ + if (!create) + return ENXIO; + /* yes we should, see if we have space to attach entries */ + needed = 2; /* in some cases we only need 1 */ + if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { + D("bridge full %d, cannot create new port", b->bdg_active_ports); + return ENOMEM; + } + /* record the next two ports available, but do not allocate yet */ + cand = b->bdg_port_index[b->bdg_active_ports]; + cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; + ND("+++ bridge %s port %s used %d avail %d %d", + b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); + + /* + * try see if there is a matching NIC with this name + * (after the bridge's name) + */ + ifname = nr_name + b->bdg_namelen + 1; + ifp = ifunit_ref(ifname); + if (!ifp) { + /* Create an ephemeral virtual port. + * This block contains all the ephemeral-specific logic. + */ + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + error = EINVAL; + goto out; + } + + /* bdg_netmap_attach creates a struct netmap_adapter */ + error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna); + if (error) { + D("error %d", error); + goto out; + } + /* shortcut - we can skip get_hw_na(), + * ownership check and nm_bdg_attach() + */ + + } else { + struct netmap_adapter *hw; + + /* the vale:nic syntax is only valid for some commands */ + switch (hdr->nr_reqtype) { + case NETMAP_REQ_VALE_ATTACH: + case NETMAP_REQ_VALE_DETACH: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: + break; /* ok */ + default: + error = EINVAL; + goto out; + } + + error = netmap_get_hw_na(ifp, nmd, &hw); + if (error || hw == NULL) + goto out; + + /* host adapter might not be created */ + error = hw->nm_bdg_attach(nr_name, hw, b); + if (error == NM_NEED_BWRAP) { + error = b->bdg_ops->bwrap_attach(nr_name, hw); + } + if (error) + goto out; + vpna = hw->na_vp; + hostna = hw->na_hostvp; + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + /* Check if we need to skip the host rings. */ + struct nmreq_vale_attach *areq = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + if (areq->reg.nr_mode != NR_REG_NIC_SW) { + hostna = NULL; + } + } + } + + BDG_WLOCK(b); + vpna->bdg_port = cand; + ND("NIC %p to bridge port %d", vpna, cand); + /* bind the port to the bridge (virtual ports are not active) */ + b->bdg_ports[cand] = vpna; + vpna->na_bdg = b; + b->bdg_active_ports++; + if (hostna != NULL) { + /* also bind the host stack to the bridge */ + b->bdg_ports[cand2] = hostna; + hostna->bdg_port = cand2; + hostna->na_bdg = b; + b->bdg_active_ports++; + ND("host %p to bridge port %d", hostna, cand2); + } + ND("if %s refs %d", ifname, vpna->up.na_refcount); + BDG_WUNLOCK(b); + *na = &vpna->up; + netmap_adapter_get(*na); + +out: + if (ifp) + if_rele(ifp); + + return error; +} + +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter * vpna; + struct netmap_adapter *na = NULL; + struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto unlock_exit; + } + } + + /* check for existing one */ + error = netmap_get_vale_na(hdr, &na, nmd, 0); + if (na) { + error = EBUSY; + goto unref_exit; + } + error = netmap_get_vale_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } + + if (NETMAP_OWNED_BY_ANY(na)) { + error = EBUSY; + goto unref_exit; + } + + if (na->nm_bdg_ctl) { + /* nop for VALE ports. The bwrap needs to put the hwna + * in netmap mode (see netmap_bwrap_bdg_ctl) + */ + error = na->nm_bdg_ctl(hdr, na); + if (error) + goto unref_exit; + ND("registered %s to netmap-mode", na->name); + } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; + NMG_UNLOCK(); + return 0; + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; +} + +static inline int +nm_is_bwrap(struct netmap_adapter *na) +{ + return na->nm_register == netmap_bwrap_reg; +} + +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter *vpna; + struct netmap_adapter *na; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); + if (error) { /* no device, or another bridge or user owns the device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + goto unref_exit; + } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + + if (na->nm_bdg_ctl) { + /* remove the port from bridge. The bwrap + * also needs to put the hwna in normal mode + */ + error = na->nm_bdg_ctl(hdr, na); + } + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; + +} + +struct nm_bdg_polling_state; +struct +nm_bdg_kthread { + struct nm_kctx *nmk; + u_int qfirst; + u_int qlast; + struct nm_bdg_polling_state *bps; +}; + +struct nm_bdg_polling_state { + bool configured; + bool stopped; + struct netmap_bwrap_adapter *bna; + uint32_t mode; + u_int qfirst; + u_int qlast; + u_int cpu_from; + u_int ncpus; + struct nm_bdg_kthread *kthreads; +}; + +static void +netmap_bwrap_polling(void *data, int is_kthread) +{ + struct nm_bdg_kthread *nbk = data; + struct netmap_bwrap_adapter *bna; + u_int qfirst, qlast, i; + struct netmap_kring **kring0, *kring; + + if (!nbk) + return; + qfirst = nbk->qfirst; + qlast = nbk->qlast; + bna = nbk->bps->bna; + kring0 = NMR(bna->hwna, NR_RX); + + for (i = qfirst; i < qlast; i++) { + kring = kring0[i]; + kring->nm_notify(kring, 0); + } +} + +static int +nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) +{ + struct nm_kctx_cfg kcfg; + int i, j; + + bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); + if (bps->kthreads == NULL) + return ENOMEM; + + bzero(&kcfg, sizeof(kcfg)); + kcfg.worker_fn = netmap_bwrap_polling; + kcfg.use_kthread = 1; + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + int all = (bps->ncpus == 1 && + bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); + int affinity = bps->cpu_from + i; + + t->bps = bps; + t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; + t->qlast = all ? bps->qlast : t->qfirst + 1; + D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, + t->qlast); + + kcfg.type = i; + kcfg.worker_private = t; + t->nmk = nm_os_kctx_create(&kcfg, NULL); + if (t->nmk == NULL) { + goto cleanup; + } + nm_os_kctx_worker_setaff(t->nmk, affinity); + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_destroy(t->nmk); + } + nm_os_free(bps->kthreads); + return EFAULT; +} + +/* A variant of ptnetmap_start_kthreads() */ +static int +nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) +{ + int error, i, j; + + if (!bps) { + D("polling is not configured"); + return EFAULT; + } + bps->stopped = false; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + error = nm_os_kctx_worker_start(t->nmk); + if (error) { + D("error in nm_kthread_start()"); + goto cleanup; + } + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_worker_stop(t->nmk); + } + bps->stopped = true; + return error; +} + +static void +nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) +{ + int i; + + if (!bps) + return; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_worker_stop(t->nmk); + nm_os_kctx_destroy(t->nmk); + } + bps->stopped = true; +} + +static int +get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, + struct nm_bdg_polling_state *bps) +{ + unsigned int avail_cpus, core_from; + unsigned int qfirst, qlast; + uint32_t i = req->nr_first_cpu_id; + uint32_t req_cpus = req->nr_num_polling_cpus; + + avail_cpus = nm_os_ncpus(); + + if (req_cpus == 0) { + D("req_cpus must be > 0"); + return EINVAL; + } else if (req_cpus >= avail_cpus) { + D("Cannot use all the CPUs in the system"); + return EINVAL; + } + + if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { + /* Use a separate core for each ring. If nr_num_polling_cpus>1 + * more consecutive rings are polled. + * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, + * ring 2 and 3 are polled by core 2 and 3, respectively. */ + if (i + req_cpus > nma_get_nrings(na, NR_RX)) { + D("Rings %u-%u not in range (have %d rings)", + i, i + req_cpus, nma_get_nrings(na, NR_RX)); + return EINVAL; + } + qfirst = i; + qlast = qfirst + req_cpus; + core_from = qfirst; + + } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { + /* Poll all the rings using a core specified by nr_first_cpu_id. + * the number of cores must be 1. */ + if (req_cpus != 1) { + D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " + "(was %d)", req_cpus); + return EINVAL; + } + qfirst = 0; + qlast = nma_get_nrings(na, NR_RX); + core_from = i; + } else { + D("Invalid polling mode"); + return EINVAL; + } + + bps->mode = req->nr_mode; + bps->qfirst = qfirst; + bps->qlast = qlast; + bps->cpu_from = core_from; + bps->ncpus = req_cpus; + D("%s qfirst %u qlast %u cpu_from %u ncpus %u", + req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? + "MULTI" : "SINGLE", + qfirst, qlast, core_from, req_cpus); + return 0; +} + +static int +nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) +{ + struct nm_bdg_polling_state *bps; + struct netmap_bwrap_adapter *bna; + int error; + + bna = (struct netmap_bwrap_adapter *)na; + if (bna->na_polling_state) { + D("ERROR adapter already in polling mode"); + return EFAULT; + } + + bps = nm_os_malloc(sizeof(*bps)); + if (!bps) + return ENOMEM; + bps->configured = false; + bps->stopped = true; + + if (get_polling_cfg(req, na, bps)) { + nm_os_free(bps); + return EINVAL; + } + + if (nm_bdg_create_kthreads(bps)) { + nm_os_free(bps); + return EFAULT; + } + + bps->configured = true; + bna->na_polling_state = bps; + bps->bna = bna; + + /* disable interrupts if possible */ + nma_intr_enable(bna->hwna, 0); + /* start kthread now */ + error = nm_bdg_polling_start_kthreads(bps); + if (error) { + D("ERROR nm_bdg_polling_start_kthread()"); + nm_os_free(bps->kthreads); + nm_os_free(bps); + bna->na_polling_state = NULL; + nma_intr_enable(bna->hwna, 1); + } + return error; +} + +static int +nm_bdg_ctl_polling_stop(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; + struct nm_bdg_polling_state *bps; + + if (!bna->na_polling_state) { + D("ERROR adapter is not in polling mode"); + return EFAULT; + } + bps = bna->na_polling_state; + nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); + bps->configured = false; + nm_os_free(bps); + bna->na_polling_state = NULL; + /* reenable interrupts */ + nma_intr_enable(bna->hwna, 1); + return 0; +} + +int +nm_bdg_polling(struct nmreq_header *hdr) +{ + struct nmreq_vale_polling *req = + (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; + struct netmap_adapter *na = NULL; + int error = 0; + + NMG_LOCK(); + error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); + if (na && !error) { + if (!nm_is_bwrap(na)) { + error = EOPNOTSUPP; + } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { + error = nm_bdg_ctl_polling_start(req, na); + if (!error) + netmap_adapter_get(na); + } else { + error = nm_bdg_ctl_polling_stop(na); + if (!error) + netmap_adapter_put(na); + } + netmap_adapter_put(na); + } else if (!na && !error) { + /* Not VALE port. */ + error = EINVAL; + } + NMG_UNLOCK(); + + return error; +} + +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_bdg_list(struct nmreq_header *hdr) +{ + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; + int namelen = strlen(hdr->nr_name); + struct nm_bridge *b, *bridges; + struct netmap_vp_adapter *vpna; + int error = 0, i, j; + u_int num_bridges; + + netmap_bns_getbridges(&bridges, &num_bridges); + + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; + } + NMG_LOCK(); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (!b) { + NMG_UNLOCK(); + return ENOENT; + } + + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + D("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; + } + } + NMG_UNLOCK(); + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; + + NMG_LOCK(); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strncpy(hdr->nr_name, vpna->up.name, + (size_t)IFNAMSIZ); + error = 0; + goto out; + } + j = 0; /* following bridges scan from 0 */ + } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; + NMG_UNLOCK(); + } + + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to set configure/lookup/dtor functions of a VALE instance. + * Register callbacks to the given bridge. 'name' may be just + * bridge's name (including ':' if it is not just NM_BDG_NAME). + * + * Called without NMG_LOCK. + */ + +int +netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) +{ + struct nm_bridge *b; + int error = 0; + + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */, NULL); + if (!b) { + error = ENXIO; + goto unlock_regops; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_regops; + } + + BDG_WLOCK(b); + if (!bdg_ops) { + /* resetting the bridge */ + bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); + b->bdg_ops = NULL; + b->private_data = b->ht; + } else { + /* modifying the bridge */ + b->private_data = private_data; + b->bdg_ops = bdg_ops; + } + BDG_WUNLOCK(b); + +unlock_regops: + NMG_UNLOCK(); + return error; +} + + +int +netmap_bdg_config(struct nm_ifreq *nr) +{ + struct nm_bridge *b; + int error = EINVAL; + + NMG_LOCK(); + b = nm_find_bridge(nr->nifr_name, 0, NULL); + if (!b) { + NMG_UNLOCK(); + return error; + } + NMG_UNLOCK(); + /* Don't call config() with NMG_LOCK() held */ + BDG_RLOCK(b); + if (b->bdg_ops->config != NULL) + error = b->bdg_ops->config(nr); + BDG_RUNLOCK(b); + return error; +} + + +/* nm_register callback for VALE ports */ +int +netmap_vp_reg(struct netmap_adapter *na, int onoff) +{ + struct netmap_vp_adapter *vpna = + (struct netmap_vp_adapter*)na; + enum txrx t; + int i; + + /* persistent ports may be put in netmap mode + * before being attached to a bridge + */ + if (vpna->na_bdg) + BDG_WLOCK(vpna->na_bdg); + if (onoff) { + for_rx_tx(t) { + for (i = 0; i < netmap_real_rings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[i]; + + if (nm_kring_pending_on(kring)) + kring->nr_mode = NKR_NETMAP_ON; + } + } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; + /* XXX on FreeBSD, persistent VALE ports should also + * toggle IFCAP_NETMAP in na->ifp (2014-03-16) + */ + } else { + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + for_rx_tx(t) { + for (i = 0; i < netmap_real_rings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[i]; + + if (nm_kring_pending_off(kring)) + kring->nr_mode = NKR_NETMAP_OFF; + } + } + } + if (vpna->na_bdg) + BDG_WUNLOCK(vpna->na_bdg); + return 0; +} + + +/* rxsync code used by VALE ports nm_rxsync callback and also + * internally by the brwap + */ +static int +netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_ring *ring = kring->ring; + u_int nm_i, lim = kring->nkr_num_slots - 1; + u_int head = kring->rhead; + int n; + + if (head > lim) { + D("ouch dangerous reset!!!"); + n = netmap_ring_reinit(kring); + goto done; + } + + /* First part, import newly received packets. */ + /* actually nothing to do here, they are already in the kring */ + + /* Second part, skip past packets that userspace has released. */ + nm_i = kring->nr_hwcur; + if (nm_i != head) { + /* consistency check, but nothing really important here */ + for (n = 0; likely(nm_i != head); n++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + void *addr = NMB(na, slot); + + if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ + D("bad buffer index %d, ignore ?", + slot->buf_idx); + } + slot->flags &= ~NS_BUF_CHANGED; + nm_i = nm_next(nm_i, lim); + } + kring->nr_hwcur = head; + } + + n = 0; +done: + return n; +} + +/* + * nm_rxsync callback for VALE ports + * user process reading from a VALE switch. + * Already protected against concurrent calls from userspace, + * but we must acquire the queue's lock to protect against + * writers on the same queue. + */ +int +netmap_vp_rxsync(struct netmap_kring *kring, int flags) +{ + int n; + + mtx_lock(&kring->q_lock); + n = netmap_vp_rxsync_locked(kring, flags); + mtx_unlock(&kring->q_lock); + return n; +} + +int +netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, + struct netmap_bdg_ops *ops) +{ + return ops->bwrap_attach(nr_name, hwna); +} + + +/* Bridge wrapper code (bwrap). + * This is used to connect a non-VALE-port netmap_adapter (hwna) to a + * VALE switch. + * The main task is to swap the meaning of tx and rx rings to match the + * expectations of the VALE switch code (see nm_bdg_flush). + * + * The bwrap works by interposing a netmap_bwrap_adapter between the + * rest of the system and the hwna. The netmap_bwrap_adapter looks like + * a netmap_vp_adapter to the rest the system, but, internally, it + * translates all callbacks to what the hwna expects. + * + * Note that we have to intercept callbacks coming from two sides: + * + * - callbacks coming from the netmap module are intercepted by + * passing around the netmap_bwrap_adapter instead of the hwna + * + * - callbacks coming from outside of the netmap module only know + * about the hwna. This, however, only happens in interrupt + * handlers, where only the hwna->nm_notify callback is called. + * What the bwrap does is to overwrite the hwna->nm_notify callback + * with its own netmap_bwrap_intr_notify. + * XXX This assumes that the hwna->nm_notify callback was the + * standard netmap_notify(), as it is the case for nic adapters. + * Any additional action performed by hwna->nm_notify will not be + * performed by netmap_bwrap_intr_notify. + * + * Additionally, the bwrap can optionally attach the host rings pair + * of the wrapped adapter to a different port of the switch. + */ + + +static void +netmap_bwrap_dtor(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; + struct netmap_adapter *hwna = bna->hwna; + struct nm_bridge *b = bna->up.na_bdg, + *bh = bna->host.na_bdg; + + if (bna->host.up.nm_mem) + netmap_mem_put(bna->host.up.nm_mem); + + if (b) { + netmap_bdg_detach_common(b, bna->up.bdg_port, + (bh ? bna->host.bdg_port : -1)); + } + + ND("na %p", na); + na->ifp = NULL; + bna->host.up.ifp = NULL; + hwna->na_vp = bna->saved_na_vp; + hwna->na_hostvp = NULL; + hwna->na_private = NULL; + hwna->na_flags &= ~NAF_BUSY; + netmap_adapter_put(hwna); + +} + + +/* + * Intr callback for NICs connected to a bridge. + * Simply ignore tx interrupts (maybe we could try to recover space ?) + * and pass received packets from nic to the bridge. + * + * XXX TODO check locking: this is called from the interrupt + * handler so we should make sure that the interface is not + * disconnected while passing down an interrupt. + * + * Note, no user process can access this NIC or the host stack. + * The only part of the ring that is significant are the slots, + * and head/cur/tail are set from the kring as needed + * (part as a receive ring, part as a transmit ring). + * + * callback that overwrites the hwna notify callback. + * Packets come from the outside or from the host stack and are put on an + * hwna rx ring. + * The bridge wrapper then sends the packets through the bridge. + */ +static int +netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = na->na_private; + struct netmap_kring *bkring; + struct netmap_vp_adapter *vpna = &bna->up; + u_int ring_nr = kring->ring_id; + int ret = NM_IRQ_COMPLETED; + int error; + + if (netmap_verbose) + D("%s %s 0x%x", na->name, kring->name, flags); + + bkring = vpna->up.tx_rings[ring_nr]; + + /* make sure the ring is not disabled */ + if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { + return EIO; + } + + if (netmap_verbose) + D("%s head %d cur %d tail %d", na->name, + kring->rhead, kring->rcur, kring->rtail); + + /* simulate a user wakeup on the rx ring + * fetch packets that have arrived. + */ + error = kring->nm_sync(kring, 0); + if (error) + goto put_out; + if (kring->nr_hwcur == kring->nr_hwtail) { + if (netmap_verbose) + D("how strange, interrupt with no packets on %s", + na->name); + goto put_out; + } + + /* new packets are kring->rcur to kring->nr_hwtail, and the bkring + * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail + * to push all packets out. + */ + bkring->rhead = bkring->rcur = kring->nr_hwtail; + + bkring->nm_sync(bkring, flags); + + /* mark all buffers as released on this ring */ + kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; + /* another call to actually release the buffers */ + error = kring->nm_sync(kring, 0); + + /* The second rxsync may have further advanced hwtail. If this happens, + * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ + if (kring->rcur != kring->nr_hwtail) { + ret = NM_IRQ_RESCHED; + } +put_out: + nm_kr_put(kring); + + return error ? error : ret; +} + + +/* nm_register callback for bwrap */ +int +netmap_bwrap_reg(struct netmap_adapter *na, int onoff) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + struct netmap_vp_adapter *hostna = &bna->host; + int error, i; + enum txrx t; + + ND("%s %s", na->name, onoff ? "on" : "off"); + + if (onoff) { + /* netmap_do_regif has been called on the bwrap na. + * We need to pass the information about the + * memory allocator down to the hwna before + * putting it in netmap mode + */ + hwna->na_lut = na->na_lut; + + if (hostna->na_bdg) { + /* if the host rings have been attached to switch, + * we need to copy the memory allocator information + * in the hostna also + */ + hostna->up.na_lut = na->na_lut; + } + + } + + /* pass down the pending ring state information */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = + NMR(na, t)[i]->nr_pending_mode; + } + } + + /* forward the request to the hwna */ + error = hwna->nm_register(hwna, onoff); + if (error) + return error; + + /* copy up the current ring state information */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; + NMR(na, t)[i]->nr_mode = kring->nr_mode; + } + } + + /* impersonate a netmap_vp_adapter */ + netmap_vp_reg(na, onoff); + if (hostna->na_bdg) + netmap_vp_reg(&hostna->up, onoff); + + if (onoff) { + u_int i; + /* intercept the hwna nm_nofify callback on the hw rings */ + for (i = 0; i < hwna->num_rx_rings; i++) { + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; + } + i = hwna->num_rx_rings; /* for safety */ + /* save the host ring notify unconditionally */ + for (; i < netmap_real_rings(hwna, NR_RX); i++) { + hwna->rx_rings[i]->save_notify = + hwna->rx_rings[i]->nm_notify; + if (hostna->na_bdg) { + /* also intercept the host ring notify */ + hwna->rx_rings[i]->nm_notify = + netmap_bwrap_intr_notify; + na->tx_rings[i]->nm_sync = na->nm_txsync; + } + } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; + } else { + u_int i; + + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + + /* reset all notify callbacks (including host ring) */ + for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { + hwna->rx_rings[i]->nm_notify = + hwna->rx_rings[i]->save_notify; + hwna->rx_rings[i]->save_notify = NULL; + } + hwna->na_lut.lut = NULL; + hwna->na_lut.plut = NULL; + hwna->na_lut.objtotal = 0; + hwna->na_lut.objsize = 0; + + /* pass ownership of the netmap rings to the hwna */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + NMR(na, t)[i]->ring = NULL; + } + } + /* reset the number of host rings to default */ + for_rx_tx(t) { + nma_set_host_nrings(hwna, t, 1); + } + + } + + return 0; +} + +/* nm_config callback for bwrap */ +static int +netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + int error; + + /* Forward the request to the hwna. It may happen that nobody + * registered hwna yet, so netmap_mem_get_lut() may have not + * been called yet. */ + error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); + if (error) + return error; + netmap_update_config(hwna); + /* swap the results and propagate */ + info->num_tx_rings = hwna->num_rx_rings; + info->num_tx_descs = hwna->num_rx_desc; + info->num_rx_rings = hwna->num_tx_rings; + info->num_rx_descs = hwna->num_tx_desc; + info->rx_buf_maxsize = hwna->rx_buf_maxsize; + + return 0; +} + + +/* nm_krings_create callback for bwrap */ +int +netmap_bwrap_krings_create_common(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + struct netmap_adapter *hostna = &bna->host.up; + int i, error = 0; + enum txrx t; + + /* also create the hwna krings */ + error = hwna->nm_krings_create(hwna); + if (error) { + return error; + } + + /* increment the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users++; + } + } + + /* now create the actual rings */ + error = netmap_mem_rings_create(hwna); + if (error) { + goto err_dec_users; + } + + /* cross-link the netmap rings + * The original number of rings comes from hwna, + * rx rings on one side equals tx rings on the other. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < netmap_all_rings(hwna, r); i++) { + NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; + NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; + } + } + + if (na->na_flags & NAF_HOST_RINGS) { + /* the hostna rings are the host rings of the bwrap. + * The corresponding krings must point back to the + * hostna + */ + hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; + hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hostna, t); i++) { + NMR(hostna, t)[i]->na = hostna; + } + } + } + + return 0; + +err_dec_users: + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users--; + } + } + hwna->nm_krings_delete(hwna); + return error; +} + + +void +netmap_bwrap_krings_delete_common(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + enum txrx t; + int i; + + ND("%s", na->name); + + /* decrement the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users--; + } + } + + /* delete any netmap rings that are no longer needed */ + netmap_mem_rings_delete(hwna); + hwna->nm_krings_delete(hwna); +} + + +/* notify method for the bridge-->hwna direction */ +int +netmap_bwrap_notify(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = na->na_private; + struct netmap_adapter *hwna = bna->hwna; + u_int ring_n = kring->ring_id; + u_int lim = kring->nkr_num_slots - 1; + struct netmap_kring *hw_kring; + int error; + + ND("%s: na %s hwna %s", + (kring ? kring->name : "NULL!"), + (na ? na->name : "NULL!"), + (hwna ? hwna->name : "NULL!")); + hw_kring = hwna->tx_rings[ring_n]; + + if (nm_kr_tryget(hw_kring, 0, NULL)) { + return ENXIO; + } + + /* first step: simulate a user wakeup on the rx ring */ + netmap_vp_rxsync(kring, flags); + ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", + na->name, ring_n, + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); + /* second step: the new packets are sent on the tx ring + * (which is actually the same ring) + */ + hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; + error = hw_kring->nm_sync(hw_kring, flags); + if (error) + goto put_out; + + /* third step: now we are back the rx ring */ + /* claim ownership on all hw owned bufs */ + kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ + + /* fourth step: the user goes to sleep again, causing another rxsync */ + netmap_vp_rxsync(kring, flags); + ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", + na->name, ring_n, + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); +put_out: + nm_kr_put(hw_kring); + + return error ? error : NM_IRQ_COMPLETED; +} + + +/* nm_bdg_ctl callback for the bwrap. + * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. + * On attach, it needs to provide a fake netmap_priv_d structure and + * perform a netmap_do_regif() on the bwrap. This will put both the + * bwrap and the hwna in netmap mode, with the netmap rings shared + * and cross linked. Moroever, it will start intercepting interrupts + * directed to hwna. + */ +static int +netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) +{ + struct netmap_priv_d *npriv; + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; + int error = 0; + + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + if (req->reg.nr_ringid != 0 || + (req->reg.nr_mode != NR_REG_ALL_NIC && + req->reg.nr_mode != NR_REG_NIC_SW)) { + /* We only support attaching all the NIC rings + * and/or the host stack. */ + return EINVAL; + } + if (NETMAP_OWNED_BY_ANY(na)) { + return EBUSY; + } + if (bna->na_kpriv) { + /* nothing to do */ + return 0; + } + npriv = netmap_priv_new(); + if (npriv == NULL) + return ENOMEM; + npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ + error = netmap_do_regif(npriv, na, req->reg.nr_mode, + req->reg.nr_ringid, req->reg.nr_flags); + if (error) { + netmap_priv_delete(npriv); + return error; + } + bna->na_kpriv = npriv; + na->na_flags |= NAF_BUSY; + } else { + if (na->active_fds == 0) /* not registered */ + return EINVAL; + netmap_priv_delete(bna->na_kpriv); + bna->na_kpriv = NULL; + na->na_flags &= ~NAF_BUSY; + } + + return error; +} + +/* attach a bridge wrapper to the 'real' device */ +int +netmap_bwrap_attach_common(struct netmap_adapter *na, + struct netmap_adapter *hwna) +{ + struct netmap_bwrap_adapter *bna; + struct netmap_adapter *hostna = NULL; + int error = 0; + enum txrx t; + + /* make sure the NIC is not already in use */ + if (NETMAP_OWNED_BY_ANY(hwna)) { + D("NIC %s busy, cannot attach to bridge", hwna->name); + return EBUSY; + } + + bna = (struct netmap_bwrap_adapter *)na; + /* make bwrap ifp point to the real ifp */ + na->ifp = hwna->ifp; + if_ref(na->ifp); + na->na_private = bna; + /* fill the ring data for the bwrap adapter with rx/tx meanings + * swapped. The real cross-linking will be done during register, + * when all the krings will have been created. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + nma_set_nrings(na, t, nma_get_nrings(hwna, r)); + nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); + } + na->nm_dtor = netmap_bwrap_dtor; + na->nm_config = netmap_bwrap_config; + na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; + na->pdev = hwna->pdev; + na->nm_mem = netmap_mem_get(hwna->nm_mem); + na->virt_hdr_len = hwna->virt_hdr_len; + na->rx_buf_maxsize = hwna->rx_buf_maxsize; + + bna->hwna = hwna; + netmap_adapter_get(hwna); + hwna->na_private = bna; /* weak reference */ + bna->saved_na_vp = hwna->na_vp; + hwna->na_vp = &bna->up; + bna->up.up.na_vp = &(bna->up); + + if (hwna->na_flags & NAF_HOST_RINGS) { + if (hwna->na_flags & NAF_SW_ONLY) + na->na_flags |= NAF_SW_ONLY; + na->na_flags |= NAF_HOST_RINGS; + hostna = &bna->host.up; + + /* limit the number of host rings to that of hw */ + nm_bound_var(&hostna->num_tx_rings, 1, 1, + nma_get_nrings(hwna, NR_TX), NULL); + nm_bound_var(&hostna->num_rx_rings, 1, 1, + nma_get_nrings(hwna, NR_RX), NULL); + + snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); + hostna->ifp = hwna->ifp; + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); + u_int nr = nma_get_nrings(hostna, t); + + nma_set_nrings(hostna, t, nr); + nma_set_host_nrings(na, t, nr); + if (nma_get_host_nrings(hwna, t) < nr) { + nma_set_host_nrings(hwna, t, nr); + } + nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); + } + // hostna->nm_txsync = netmap_bwrap_host_txsync; + // hostna->nm_rxsync = netmap_bwrap_host_rxsync; + hostna->nm_mem = netmap_mem_get(na->nm_mem); + hostna->na_private = bna; + hostna->na_vp = &bna->up; + na->na_hostvp = hwna->na_hostvp = + hostna->na_hostvp = &bna->host; + hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ + hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; + } + + ND("%s<->%s txr %d txd %d rxr %d rxd %d", + na->name, ifp->if_xname, + na->num_tx_rings, na->num_tx_desc, + na->num_rx_rings, na->num_rx_desc); + + error = netmap_attach_common(na); + if (error) { + goto err_put; + } + hwna->na_flags |= NAF_BUSY; + return 0; + +err_put: + hwna->na_vp = hwna->na_hostvp = NULL; + netmap_adapter_put(hwna); + return error; + +} + +struct nm_bridge * +netmap_init_bridges2(u_int n) +{ + int i; + struct nm_bridge *b; + + b = nm_os_malloc(sizeof(struct nm_bridge) * n); + if (b == NULL) + return NULL; + for (i = 0; i < n; i++) + BDG_RWINIT(&b[i]); + return b; +} + +void +netmap_uninit_bridges2(struct nm_bridge *b, u_int n) +{ + int i; + + if (b == NULL) + return; + + for (i = 0; i < n; i++) + BDG_RWDESTROY(&b[i]); + nm_os_free(b); +} + +int +netmap_init_bridges(void) +{ +#ifdef CONFIG_NET_NS + return netmap_bns_register(); +#else + nm_bridges = netmap_init_bridges2(NM_BRIDGES); + if (nm_bridges == NULL) + return ENOMEM; + return 0; +#endif +} + +void +netmap_uninit_bridges(void) +{ +#ifdef CONFIG_NET_NS + netmap_bns_unregister(); +#else + netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); +#endif +} |
