aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_vale.c
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
commit37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap/netmap_vale.c
parent63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_vale.c')
-rw-r--r--sys/dev/netmap/netmap_vale.c665
1 files changed, 508 insertions, 157 deletions
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index ddd7334a8378..2d2c807681d2 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -101,6 +102,9 @@ __FBSDID("$FreeBSD$");
#warning OSX support is only partial
#include "osx_glue.h"
+#elif defined(_WIN32)
+#include "win_glue.h"
+
#else
#error Unsupported platform
@@ -119,7 +123,7 @@ __FBSDID("$FreeBSD$");
/*
* system parameters (most of them in netmap_kern.h)
- * NM_NAME prefix for switch port names, default "vale"
+ * NM_BDG_NAME prefix for switch port names, default "vale"
* NM_BDG_MAXPORTS number of ports
* NM_BRIDGES max number of switches in the system.
* XXX should become a sysctl or tunable
@@ -144,7 +148,6 @@ __FBSDID("$FreeBSD$");
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
-#define NM_BRIDGES 8 /* number of bridges */
/*
@@ -152,14 +155,15 @@ __FBSDID("$FreeBSD$");
* used in the bridge. The actual value may be larger as the
* last packet in the block may overflow the size.
*/
-int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+SYSBEGIN(vars_vale);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
-
+SYSEND;
static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
-static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
+static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
/*
* For each output interface, nm_bdg_q is used to construct a list.
@@ -213,7 +217,7 @@ struct nm_bridge {
* forward this packet. ring_nr is the source ring index, and the
* function may overwrite this value to forward this packet to a
* different ring index.
- * This function must be set by netmap_bdgctl().
+ * This function must be set by netmap_bdg_ctl().
*/
struct netmap_bdg_ops bdg_ops;
@@ -244,7 +248,7 @@ netmap_bdg_name(struct netmap_vp_adapter *vp)
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
-struct nm_bridge *nm_bridges;
+static struct nm_bridge *nm_bridges;
#endif /* !CONFIG_NET_NS */
@@ -278,6 +282,45 @@ pkt_copy(void *_src, void *_dst, int l)
}
+static int
+nm_is_id_char(const char c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+ int colon_pos = -1;
+ int i;
+
+ if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ return -1;
+ }
+
+ for (i = 0; name[i]; i++) {
+ if (name[i] == ':') {
+ if (colon_pos != -1) {
+ return -1;
+ }
+ colon_pos = i;
+ } else if (!nm_is_id_char(name[i])) {
+ return -1;
+ }
+ }
+
+ if (i >= IFNAMSIZ) {
+ return -1;
+ }
+
+ return colon_pos;
+}
+
/*
* locate a bridge among the existing ones.
* MUST BE CALLED WITH NMG_LOCK()
@@ -288,7 +331,7 @@ pkt_copy(void *_src, void *_dst, int l)
static struct nm_bridge *
nm_find_bridge(const char *name, int create)
{
- int i, l, namelen;
+ int i, namelen;
struct nm_bridge *b = NULL, *bridges;
u_int num_bridges;
@@ -296,21 +339,11 @@ nm_find_bridge(const char *name, int create)
netmap_bns_getbridges(&bridges, &num_bridges);
- namelen = strlen(NM_NAME); /* base length */
- l = name ? strlen(name) : 0; /* actual length */
- if (l < namelen) {
+ namelen = nm_vale_name_validate(name);
+ if (namelen < 0) {
D("invalid bridge name %s", name ? name : NULL);
return NULL;
}
- for (i = namelen + 1; i < l; i++) {
- if (name[i] == ':') {
- namelen = i;
- break;
- }
- }
- if (namelen >= IFNAMSIZ)
- namelen = IFNAMSIZ;
- ND("--- prefix is '%.*s' ---", namelen, name);
/* lookup the name, remember empty slot if there is one */
for (i = 0; i < num_bridges; i++) {
@@ -479,6 +512,7 @@ netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
struct nm_bridge *b = vpna->na_bdg;
+ (void)nmr; // XXX merge ?
if (attach)
return 0; /* nothing to do */
if (b) {
@@ -518,7 +552,7 @@ nm_vi_destroy(const char *name)
return ENXIO;
NMG_LOCK();
/* make sure this is actually a VALE port */
- if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
+ if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
error = EINVAL;
goto err;
}
@@ -535,7 +569,7 @@ nm_vi_destroy(const char *name)
*/
if_rele(ifp);
netmap_detach(ifp);
- nm_vi_detach(ifp);
+ nm_os_vi_detach(ifp);
return 0;
err:
@@ -556,14 +590,14 @@ nm_vi_create(struct nmreq *nmr)
int error;
/* don't include VALE prefix */
- if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
+ if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
return EINVAL;
ifp = ifunit_ref(nmr->nr_name);
if (ifp) { /* already exist, cannot create new one */
if_rele(ifp);
return EEXIST;
}
- error = nm_vi_persist(nmr->nr_name, &ifp);
+ error = nm_os_vi_persist(nmr->nr_name, &ifp);
if (error)
return error;
@@ -572,12 +606,13 @@ nm_vi_create(struct nmreq *nmr)
error = netmap_vp_create(nmr, ifp, &vpna);
if (error) {
D("error %d", error);
- nm_vi_detach(ifp);
+ nm_os_vi_detach(ifp);
return error;
}
/* persist-specific routines */
vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
netmap_adapter_get(&vpna->up);
+ NM_ATTACH_NA(ifp, &vpna->up);
NMG_UNLOCK();
D("created %s", ifp->if_xname);
return 0;
@@ -608,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
/* first try to see if this is a bridge port. */
NMG_LOCK_ASSERT();
- if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
+ if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
return 0; /* no error, but no VALE prefix */
}
@@ -693,7 +728,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
goto out;
vpna = hw->na_vp;
hostna = hw->na_hostvp;
- if_rele(ifp);
if (nmr->nr_arg1 != NETMAP_BDG_HOST)
hostna = NULL;
}
@@ -768,6 +802,11 @@ unlock_exit:
return error;
}
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_reg;
+}
/* process NETMAP_BDG_DETACH */
static int
@@ -785,8 +824,13 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ netmap_adapter_put(na);
+ goto unlock_exit;
}
-
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
@@ -801,6 +845,267 @@ unlock_exit:
}
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+ struct nm_kthread *nmk;
+ u_int qfirst;
+ u_int qlast;
+ struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+ bool configured;
+ bool stopped;
+ struct netmap_bwrap_adapter *bna;
+ u_int reg;
+ u_int qfirst;
+ u_int qlast;
+ u_int cpu_from;
+ u_int ncpus;
+ struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data)
+{
+ struct nm_bdg_kthread *nbk = data;
+ struct netmap_bwrap_adapter *bna;
+ u_int qfirst, qlast, i;
+ struct netmap_kring *kring0, *kring;
+
+ if (!nbk)
+ return;
+ qfirst = nbk->qfirst;
+ qlast = nbk->qlast;
+ bna = nbk->bps->bna;
+ kring0 = NMR(bna->hwna, NR_RX);
+
+ for (i = qfirst; i < qlast; i++) {
+ kring = kring0 + i;
+ kring->nm_notify(kring, 0);
+ }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+ struct nm_kthread_cfg kcfg;
+ int i, j;
+
+ bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus,
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (bps->kthreads == NULL)
+ return ENOMEM;
+
+ bzero(&kcfg, sizeof(kcfg));
+ kcfg.worker_fn = netmap_bwrap_polling;
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
+ int affinity = bps->cpu_from + i;
+
+ t->bps = bps;
+ t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+ t->qlast = all ? bps->qlast : t->qfirst + 1;
+ D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
+
+ kcfg.type = i;
+ kcfg.worker_private = t;
+ t->nmk = nm_os_kthread_create(&kcfg);
+ if (t->nmk == NULL) {
+ goto cleanup;
+ }
+ nm_os_kthread_set_affinity(t->nmk, affinity);
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kthread_delete(t->nmk);
+ }
+ free(bps->kthreads, M_DEVBUF);
+ return EFAULT;
+}
+
+/* a version of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int error, i, j;
+
+ if (!bps) {
+ D("polling is not configured");
+ return EFAULT;
+ }
+ bps->stopped = false;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ error = nm_os_kthread_start(t->nmk);
+ if (error) {
+ D("error in nm_kthread_start()");
+ goto cleanup;
+ }
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kthread_stop(t->nmk);
+ }
+ bps->stopped = true;
+ return error;
+}
+
+static void
+nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int i;
+
+ if (!bps)
+ return;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kthread_stop(t->nmk);
+ nm_os_kthread_delete(t->nmk);
+ }
+ bps->stopped = true;
+}
+
+static int
+get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na,
+ struct nm_bdg_polling_state *bps)
+{
+ int req_cpus, avail_cpus, core_from;
+ u_int reg, i, qfirst, qlast;
+
+ avail_cpus = nm_os_ncpus();
+ req_cpus = nmr->nr_arg1;
+
+ if (req_cpus == 0) {
+ D("req_cpus must be > 0");
+ return EINVAL;
+ } else if (req_cpus >= avail_cpus) {
+ D("for safety, we need at least one core left in the system");
+ return EINVAL;
+ }
+ reg = nmr->nr_flags & NR_REG_MASK;
+ i = nmr->nr_ringid & NETMAP_RING_MASK;
+ /*
+ * ONE_NIC: dedicate one core to one ring. If multiple cores
+ * are specified, consecutive rings are also polled.
+ * For example, if ringid=2 and 2 cores are given,
+ * ring 2 and 3 are polled by core 2 and 3, respectively.
+ * ALL_NIC: poll all the rings using a core specified by ringid.
+ * the number of cores must be 1.
+ */
+ if (reg == NR_REG_ONE_NIC) {
+ if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
+ D("only %d rings exist (ring %u-%u is given)",
+ nma_get_nrings(na, NR_RX), i, i+req_cpus);
+ return EINVAL;
+ }
+ qfirst = i;
+ qlast = qfirst + req_cpus;
+ core_from = qfirst;
+ } else if (reg == NR_REG_ALL_NIC) {
+ if (req_cpus != 1) {
+ D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus);
+ return EINVAL;
+ }
+ qfirst = 0;
+ qlast = nma_get_nrings(na, NR_RX);
+ core_from = i;
+ } else {
+ D("reg must be ALL_NIC or ONE_NIC");
+ return EINVAL;
+ }
+
+ bps->reg = reg;
+ bps->qfirst = qfirst;
+ bps->qlast = qlast;
+ bps->cpu_from = core_from;
+ bps->ncpus = req_cpus;
+ D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC",
+ qfirst, qlast, core_from, req_cpus);
+ return 0;
+}
+
+static int
+nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
+{
+ struct nm_bdg_polling_state *bps;
+ struct netmap_bwrap_adapter *bna;
+ int error;
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ if (bna->na_polling_state) {
+ D("ERROR adapter already in polling mode");
+ return EFAULT;
+ }
+
+ bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!bps)
+ return ENOMEM;
+ bps->configured = false;
+ bps->stopped = true;
+
+ if (get_polling_cfg(nmr, na, bps)) {
+ free(bps, M_DEVBUF);
+ return EINVAL;
+ }
+
+ if (nm_bdg_create_kthreads(bps)) {
+ free(bps, M_DEVBUF);
+ return EFAULT;
+ }
+
+ bps->configured = true;
+ bna->na_polling_state = bps;
+ bps->bna = bna;
+
+ /* disable interrupt if possible */
+ if (bna->hwna->nm_intr)
+ bna->hwna->nm_intr(bna->hwna, 0);
+ /* start kthread now */
+ error = nm_bdg_polling_start_kthreads(bps);
+ if (error) {
+ D("ERROR nm_bdg_polling_start_kthread()");
+ free(bps->kthreads, M_DEVBUF);
+ free(bps, M_DEVBUF);
+ bna->na_polling_state = NULL;
+ if (bna->hwna->nm_intr)
+ bna->hwna->nm_intr(bna->hwna, 1);
+ }
+ return error;
+}
+
+static int
+nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
+ struct nm_bdg_polling_state *bps;
+
+ if (!bna->na_polling_state) {
+ D("ERROR adapter is not in polling mode");
+ return EFAULT;
+ }
+ bps = bna->na_polling_state;
+ nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
+ bps->configured = false;
+ free(bps, M_DEVBUF);
+ bna->na_polling_state = NULL;
+ /* reenable interrupt */
+ if (bna->hwna->nm_intr)
+ bna->hwna->nm_intr(bna->hwna, 1);
+ return 0;
+}
/* Called by either user's context (netmap_ioctl())
* or external kernel modules (e.g., Openvswitch).
@@ -843,7 +1148,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
case NETMAP_BDG_LIST:
/* this is used to enumerate bridges and ports */
if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
+ if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
error = EINVAL;
break;
}
@@ -855,7 +1160,9 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
break;
}
- error = ENOENT;
+ error = 0;
+ nmr->nr_arg1 = b - bridges; /* bridge index */
+ nmr->nr_arg2 = NM_BDG_NOPORT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
@@ -867,10 +1174,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
* virtual port and a NIC, respectively
*/
if (!strcmp(vpna->up.name, name)) {
- /* bridge index */
- nmr->nr_arg1 = b - bridges;
nmr->nr_arg2 = i; /* port index */
- error = 0;
break;
}
}
@@ -937,10 +1241,34 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
error = netmap_get_bdg_na(nmr, &na, 0);
if (na && !error) {
vpna = (struct netmap_vp_adapter *)na;
- vpna->virt_hdr_len = nmr->nr_arg1;
- if (vpna->virt_hdr_len)
+ na->virt_hdr_len = nmr->nr_arg1;
+ if (na->virt_hdr_len) {
vpna->mfs = NETMAP_BUF_SIZE(na);
- D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
+ }
+ D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
+ netmap_adapter_put(na);
+ } else if (!na) {
+ error = ENXIO;
+ }
+ NMG_UNLOCK();
+ break;
+
+ case NETMAP_BDG_POLLING_ON:
+ case NETMAP_BDG_POLLING_OFF:
+ NMG_LOCK();
+ error = netmap_get_bdg_na(nmr, &na, 0);
+ if (na && !error) {
+ if (!nm_is_bwrap(na)) {
+ error = EOPNOTSUPP;
+ } else if (cmd == NETMAP_BDG_POLLING_ON) {
+ error = nm_bdg_ctl_polling_start(nmr, na);
+ if (!error)
+ netmap_adapter_get(na);
+ } else {
+ error = nm_bdg_ctl_polling_stop(nmr, na);
+ if (!error)
+ netmap_adapter_put(na);
+ }
netmap_adapter_put(na);
}
NMG_UNLOCK();
@@ -1097,10 +1425,12 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
- D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
- // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
- ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
- ft[ft_i - frags].ft_frags = frags - 1;
+ /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
+ * have to fix frags count. */
+ frags--;
+ ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
+ ft[ft_i - frags].ft_frags = frags;
+ D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
@@ -1157,6 +1487,8 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_vp_adapter *vpna =
(struct netmap_vp_adapter*)na;
+ enum txrx t;
+ int i;
/* persistent ports may be put in netmap mode
* before being attached to a bridge
@@ -1164,12 +1496,30 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
if (vpna->na_bdg)
BDG_WLOCK(vpna->na_bdg);
if (onoff) {
- na->na_flags |= NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+
+ if (nm_kring_pending_on(kring))
+ kring->nr_mode = NKR_NETMAP_ON;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
/* XXX on FreeBSD, persistent VALE ports should also
* toggle IFCAP_NETMAP in na->ifp (2014-03-16)
*/
} else {
- na->na_flags &= ~NAF_NETMAP_ON;
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ struct netmap_kring *kring = &NMR(na, t)[i];
+
+ if (nm_kring_pending_off(kring))
+ kring->nr_mode = NKR_NETMAP_OFF;
+ }
+ }
}
if (vpna->na_bdg)
BDG_WUNLOCK(vpna->na_bdg);
@@ -1193,13 +1543,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
uint32_t sh, dh;
u_int dst, mysrc = na->bdg_port;
uint64_t smac, dmac;
+ uint8_t indbuf[12];
/* safety check, unfortunately we have many cases */
- if (buf_len >= 14 + na->virt_hdr_len) {
+ if (buf_len >= 14 + na->up.virt_hdr_len) {
/* virthdr + mac_hdr in the same slot */
- buf += na->virt_hdr_len;
- buf_len -= na->virt_hdr_len;
- } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
+ buf += na->up.virt_hdr_len;
+ buf_len -= na->up.virt_hdr_len;
+ } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
/* only header in first fragment */
ft++;
buf = ft->ft_buf;
@@ -1208,6 +1559,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
RD(5, "invalid buf format, length %d", buf_len);
return NM_BDG_NOPORT;
}
+
+ if (ft->ft_flags & NS_INDIRECT) {
+ if (copyin(buf, indbuf, sizeof(indbuf))) {
+ return NM_BDG_NOPORT;
+ }
+ buf = indbuf;
+ }
+
dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
smac = le64toh(*(uint64_t *)(buf + 4));
smac >>= 16;
@@ -1321,7 +1680,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
struct nm_bdg_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
- u_int i, j, me = na->bdg_port;
+ u_int i, me = na->bdg_port;
/*
* The work area (pointed by ft) is followed by an array of
@@ -1341,7 +1700,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
ND("slot %d frags %d", i, ft[i].ft_frags);
/* Drop the packet if the virtio-net header is not into the first
fragment nor at the very beginning of the second. */
- if (unlikely(na->virt_hdr_len > ft[i].ft_len))
+ if (unlikely(na->up.virt_hdr_len > ft[i].ft_len))
continue;
dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
if (netmap_verbose > 255)
@@ -1382,6 +1741,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
*/
brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
if (brddst->bq_head != NM_FT_NULL) {
+ u_int j;
for (j = 0; likely(j < b->bdg_active_ports); j++) {
uint16_t d_i;
i = b->bdg_port_index[j];
@@ -1441,8 +1801,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
*/
needed = d->bq_len + brddst->bq_len;
- if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
- RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
+ if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
+ RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
+ dst_na->up.virt_hdr_len);
/* There is a virtio-net header/offloadings mismatch between
* source and destination. The slower mismatch datapath will
* be used to cope with all the mismatches.
@@ -1803,7 +2164,6 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
nm_bound_var(&nmr->nr_arg3, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
na->num_rx_desc = nmr->nr_rx_slots;
- vpna->virt_hdr_len = 0;
vpna->mfs = 1514;
vpna->last_smac = ~0llu;
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
@@ -1880,19 +2240,17 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
{
struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
struct netmap_adapter *hwna = bna->hwna;
+ struct nm_bridge *b = bna->up.na_bdg,
+ *bh = bna->host.na_bdg;
+
+ if (b) {
+ netmap_bdg_detach_common(b, bna->up.bdg_port,
+ (bh ? bna->host.bdg_port : -1));
+ }
ND("na %p", na);
- /* drop reference to hwna->ifp.
- * If we don't do this, netmap_detach_common(na)
- * will think it has set NA(na->ifp) to NULL
- */
na->ifp = NULL;
- /* for safety, also drop the possible reference
- * in the hostna
- */
bna->host.up.ifp = NULL;
-
- hwna->nm_mem = bna->save_nmd;
hwna->na_private = NULL;
hwna->na_vp = hwna->na_hostvp = NULL;
hwna->na_flags &= ~NAF_BUSY;
@@ -1916,7 +2274,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
* (part as a receive ring, part as a transmit ring).
*
* callback that overwrites the hwna notify callback.
- * Packets come from the outside or from the host stack and are put on an hwna rx ring.
+ * Packets come from the outside or from the host stack and are put on an
+ * hwna rx ring.
* The bridge wrapper then sends the packets through the bridge.
*/
static int
@@ -1927,19 +2286,18 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
struct netmap_kring *bkring;
struct netmap_vp_adapter *vpna = &bna->up;
u_int ring_nr = kring->ring_id;
- int error = 0;
+ int ret = NM_IRQ_COMPLETED;
+ int error;
if (netmap_verbose)
D("%s %s 0x%x", na->name, kring->name, flags);
- if (!nm_netmap_on(na))
- return 0;
-
bkring = &vpna->up.tx_rings[ring_nr];
/* make sure the ring is not disabled */
- if (nm_kr_tryget(kring))
- return 0;
+ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
+ return EIO;
+ }
if (netmap_verbose)
D("%s head %d cur %d tail %d", na->name,
@@ -1951,9 +2309,10 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
error = kring->nm_sync(kring, 0);
if (error)
goto put_out;
- if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
- D("how strange, interrupt with no packets on %s",
- na->name);
+ if (kring->nr_hwcur == kring->nr_hwtail) {
+ if (netmap_verbose)
+ D("how strange, interrupt with no packets on %s",
+ na->name);
goto put_out;
}
@@ -1970,28 +2329,32 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
/* another call to actually release the buffers */
error = kring->nm_sync(kring, 0);
+ /* The second rxsync may have further advanced hwtail. If this happens,
+ * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
+ if (kring->rcur != kring->nr_hwtail) {
+ ret = NM_IRQ_RESCHED;
+ }
put_out:
nm_kr_put(kring);
- return error;
+
+ return error ? error : ret;
}
/* nm_register callback for bwrap */
static int
-netmap_bwrap_register(struct netmap_adapter *na, int onoff)
+netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
struct netmap_vp_adapter *hostna = &bna->host;
- int error;
+ int error, i;
enum txrx t;
ND("%s %s", na->name, onoff ? "on" : "off");
if (onoff) {
- int i;
-
/* netmap_do_regif has been called on the bwrap na.
* We need to pass the information about the
* memory allocator down to the hwna before
@@ -2010,16 +2373,32 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
/* cross-link the netmap rings
* The original number of rings comes from hwna,
* rx rings on one side equals tx rings on the other.
- * We need to do this now, after the initialization
- * of the kring->ring pointers
*/
for_rx_tx(t) {
- enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
- NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
- NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
+ NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
}
}
+
+ if (na->na_flags & NAF_HOST_RINGS) {
+ struct netmap_adapter *hna = &hostna->up;
+ /* the hostna rings are the host rings of the bwrap.
+ * The corresponding krings must point back to the
+ * hostna
+ */
+ hna->tx_rings = &na->tx_rings[na->num_tx_rings];
+ hna->tx_rings[0].na = hna;
+ hna->rx_rings = &na->rx_rings[na->num_rx_rings];
+ hna->rx_rings[0].na = hna;
+ }
+ }
+
+ /* pass down the pending ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+ NMR(hwna, t)[i].nr_pending_mode =
+ NMR(na, t)[i].nr_pending_mode;
}
/* forward the request to the hwna */
@@ -2027,6 +2406,13 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
if (error)
return error;
+ /* copy up the current ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+ NMR(na, t)[i].nr_mode =
+ NMR(hwna, t)[i].nr_mode;
+ }
+
/* impersonate a netmap_vp_adapter */
netmap_vp_reg(na, onoff);
if (hostna->na_bdg)
@@ -2046,8 +2432,14 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
/* also intercept the host ring notify */
hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
}
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
} else {
u_int i;
+
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+
/* reset all notify callbacks (including host ring) */
for (i = 0; i <= hwna->num_rx_rings; i++) {
hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
@@ -2089,8 +2481,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
- struct netmap_adapter *hostna = &bna->host.up;
- int error;
+ int i, error = 0;
+ enum txrx t;
ND("%s", na->name);
@@ -2102,26 +2494,23 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
/* also create the hwna krings */
error = hwna->nm_krings_create(hwna);
if (error) {
- netmap_vp_krings_delete(na);
- return error;
+ goto err_del_vp_rings;
}
- /* the connection between the bwrap krings and the hwna krings
- * will be perfomed later, in the nm_register callback, since
- * now the kring->ring pointers have not been initialized yet
- */
- if (na->na_flags & NAF_HOST_RINGS) {
- /* the hostna rings are the host rings of the bwrap.
- * The corresponding krings must point back to the
- * hostna
- */
- hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
- hostna->tx_rings[0].na = hostna;
- hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
- hostna->rx_rings[0].na = hostna;
+ /* get each ring slot number from the corresponding hwna ring */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
+ NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
+ }
}
return 0;
+
+err_del_vp_rings:
+ netmap_vp_krings_delete(na);
+
+ return error;
}
@@ -2149,7 +2538,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
u_int ring_n = kring->ring_id;
u_int lim = kring->nkr_num_slots - 1;
struct netmap_kring *hw_kring;
- int error = 0;
+ int error;
ND("%s: na %s hwna %s",
(kring ? kring->name : "NULL!"),
@@ -2157,11 +2546,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
(hwna ? hwna->name : "NULL!"));
hw_kring = &hwna->tx_rings[ring_n];
- if (nm_kr_tryget(hw_kring))
- return 0;
+ if (nm_kr_tryget(hw_kring, 0, NULL)) {
+ return ENXIO;
+ }
- if (!nm_netmap_on(hwna))
- return 0;
/* first step: simulate a user wakeup on the rx ring */
netmap_vp_rxsync(kring, flags);
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
@@ -2175,7 +2563,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
error = hw_kring->nm_sync(hw_kring, flags);
if (error)
- goto out;
+ goto put_out;
/* third step: now we are back the rx ring */
/* claim ownership on all hw owned bufs */
@@ -2188,9 +2576,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
-out:
+put_out:
nm_kr_put(hw_kring);
- return error;
+
+ return error ? error : NM_IRQ_COMPLETED;
}
@@ -2217,44 +2606,23 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
/* nothing to do */
return 0;
}
- npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
+ npriv = netmap_priv_new();
if (npriv == NULL)
return ENOMEM;
- error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
+ npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
+ error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
if (error) {
- bzero(npriv, sizeof(*npriv));
- free(npriv, M_DEVBUF);
+ netmap_priv_delete(npriv);
return error;
}
bna->na_kpriv = npriv;
na->na_flags |= NAF_BUSY;
} else {
- int last_instance;
-
if (na->active_fds == 0) /* not registered */
return EINVAL;
- last_instance = netmap_dtor_locked(bna->na_kpriv);
- if (!last_instance) {
- D("--- error, trying to detach an entry with active mmaps");
- error = EINVAL;
- } else {
- struct nm_bridge *b = bna->up.na_bdg,
- *bh = bna->host.na_bdg;
- npriv = bna->na_kpriv;
- bna->na_kpriv = NULL;
- D("deleting priv");
-
- bzero(npriv, sizeof(*npriv));
- free(npriv, M_DEVBUF);
- if (b) {
- /* XXX the bwrap dtor should take care
- * of this (2014-06-16)
- */
- netmap_bdg_detach_common(b, bna->up.bdg_port,
- (bh ? bna->host.bdg_port : -1));
- }
- na->na_flags &= ~NAF_BUSY;
- }
+ netmap_priv_delete(bna->na_kpriv);
+ bna->na_kpriv = NULL;
+ na->na_flags &= ~NAF_BUSY;
}
return error;
@@ -2282,6 +2650,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
}
na = &bna->up.up;
+ /* make bwrap ifp point to the real ifp */
+ na->ifp = hwna->ifp;
na->na_private = bna;
strncpy(na->name, nr_name, sizeof(na->name));
/* fill the ring data for the bwrap adapter with rx/tx meanings
@@ -2294,7 +2664,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
}
na->nm_dtor = netmap_bwrap_dtor;
- na->nm_register = netmap_bwrap_register;
+ na->nm_register = netmap_bwrap_reg;
// na->nm_txsync = netmap_bwrap_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
na->nm_config = netmap_bwrap_config;
@@ -2303,13 +2673,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->nm_notify = netmap_bwrap_notify;
na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
na->pdev = hwna->pdev;
- na->nm_mem = netmap_mem_private_new(na->name,
- na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc,
- 0, 0, &error);
- na->na_flags |= NAF_MEM_OWNER;
- if (na->nm_mem == NULL)
- goto err_put;
+ na->nm_mem = hwna->nm_mem;
+ na->virt_hdr_len = hwna->virt_hdr_len;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
bna->hwna = hwna;
@@ -2349,24 +2714,10 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
if (error) {
goto err_free;
}
- /* make bwrap ifp point to the real ifp
- * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
- * as a request to make the ifp point to the na. Since we
- * do not want to change the na already pointed to by hwna->ifp,
- * the following assignment has to be delayed until now
- */
- na->ifp = hwna->ifp;
hwna->na_flags |= NAF_BUSY;
- /* make hwna point to the allocator we are actually using,
- * so that monitors will be able to find it
- */
- bna->save_nmd = hwna->nm_mem;
- hwna->nm_mem = na->nm_mem;
return 0;
err_free:
- netmap_mem_delete(na->nm_mem);
-err_put:
hwna->na_vp = hwna->na_hostvp = NULL;
netmap_adapter_put(hwna);
free(bna, M_DEVBUF);