diff options
| author | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
|---|---|---|
| committer | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
| commit | 37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch) | |
| tree | 0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap/netmap_vale.c | |
| parent | 63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff) | |
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_vale.c')
| -rw-r--r-- | sys/dev/netmap/netmap_vale.c | 665 |
1 files changed, 508 insertions, 157 deletions
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index ddd7334a8378..2d2c807681d2 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -1,5 +1,6 @@ /* - * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. + * Copyright (C) 2013-2016 Universita` di Pisa + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -101,6 +102,9 @@ __FBSDID("$FreeBSD$"); #warning OSX support is only partial #include "osx_glue.h" +#elif defined(_WIN32) +#include "win_glue.h" + #else #error Unsupported platform @@ -119,7 +123,7 @@ __FBSDID("$FreeBSD$"); /* * system parameters (most of them in netmap_kern.h) - * NM_NAME prefix for switch port names, default "vale" + * NM_BDG_NAME prefix for switch port names, default "vale" * NM_BDG_MAXPORTS number of ports * NM_BRIDGES max number of switches in the system. * XXX should become a sysctl or tunable @@ -144,7 +148,6 @@ __FBSDID("$FreeBSD$"); #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) /* NM_FT_NULL terminates a list of slots in the ft */ #define NM_FT_NULL NM_BDG_BATCH_MAX -#define NM_BRIDGES 8 /* number of bridges */ /* @@ -152,14 +155,15 @@ __FBSDID("$FreeBSD$"); * used in the bridge. The actual value may be larger as the * last packet in the block may overflow the size. */ -int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ +static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ +SYSBEGIN(vars_vale); SYSCTL_DECL(_dev_netmap); SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); - +SYSEND; static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **); static int netmap_vp_reg(struct netmap_adapter *na, int onoff); -static int netmap_bwrap_register(struct netmap_adapter *, int onoff); +static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); /* * For each output interface, nm_bdg_q is used to construct a list. @@ -213,7 +217,7 @@ struct nm_bridge { * forward this packet. ring_nr is the source ring index, and the * function may overwrite this value to forward this packet to a * different ring index. - * This function must be set by netmap_bdgctl(). + * This function must be set by netmap_bdg_ctl(). */ struct netmap_bdg_ops bdg_ops; @@ -244,7 +248,7 @@ netmap_bdg_name(struct netmap_vp_adapter *vp) * Right now we have a static array and deletions are protected * by an exclusive lock. */ -struct nm_bridge *nm_bridges; +static struct nm_bridge *nm_bridges; #endif /* !CONFIG_NET_NS */ @@ -278,6 +282,45 @@ pkt_copy(void *_src, void *_dst, int l) } +static int +nm_is_id_char(const char c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_'); +} + +/* Validate the name of a VALE bridge port and return the + * position of the ":" character. */ +static int +nm_vale_name_validate(const char *name) +{ + int colon_pos = -1; + int i; + + if (!name || strlen(name) < strlen(NM_BDG_NAME)) { + return -1; + } + + for (i = 0; name[i]; i++) { + if (name[i] == ':') { + if (colon_pos != -1) { + return -1; + } + colon_pos = i; + } else if (!nm_is_id_char(name[i])) { + return -1; + } + } + + if (i >= IFNAMSIZ) { + return -1; + } + + return colon_pos; +} + /* * locate a bridge among the existing ones. * MUST BE CALLED WITH NMG_LOCK() @@ -288,7 +331,7 @@ pkt_copy(void *_src, void *_dst, int l) static struct nm_bridge * nm_find_bridge(const char *name, int create) { - int i, l, namelen; + int i, namelen; struct nm_bridge *b = NULL, *bridges; u_int num_bridges; @@ -296,21 +339,11 @@ nm_find_bridge(const char *name, int create) netmap_bns_getbridges(&bridges, &num_bridges); - namelen = strlen(NM_NAME); /* base length */ - l = name ? strlen(name) : 0; /* actual length */ - if (l < namelen) { + namelen = nm_vale_name_validate(name); + if (namelen < 0) { D("invalid bridge name %s", name ? name : NULL); return NULL; } - for (i = namelen + 1; i < l; i++) { - if (name[i] == ':') { - namelen = i; - break; - } - } - if (namelen >= IFNAMSIZ) - namelen = IFNAMSIZ; - ND("--- prefix is '%.*s' ---", namelen, name); /* lookup the name, remember empty slot if there is one */ for (i = 0; i < num_bridges; i++) { @@ -479,6 +512,7 @@ netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; struct nm_bridge *b = vpna->na_bdg; + (void)nmr; // XXX merge ? if (attach) return 0; /* nothing to do */ if (b) { @@ -518,7 +552,7 @@ nm_vi_destroy(const char *name) return ENXIO; NMG_LOCK(); /* make sure this is actually a VALE port */ - if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { + if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { error = EINVAL; goto err; } @@ -535,7 +569,7 @@ nm_vi_destroy(const char *name) */ if_rele(ifp); netmap_detach(ifp); - nm_vi_detach(ifp); + nm_os_vi_detach(ifp); return 0; err: @@ -556,14 +590,14 @@ nm_vi_create(struct nmreq *nmr) int error; /* don't include VALE prefix */ - if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME))) + if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) return EINVAL; ifp = ifunit_ref(nmr->nr_name); if (ifp) { /* already exist, cannot create new one */ if_rele(ifp); return EEXIST; } - error = nm_vi_persist(nmr->nr_name, &ifp); + error = nm_os_vi_persist(nmr->nr_name, &ifp); if (error) return error; @@ -572,12 +606,13 @@ nm_vi_create(struct nmreq *nmr) error = netmap_vp_create(nmr, ifp, &vpna); if (error) { D("error %d", error); - nm_vi_detach(ifp); + nm_os_vi_detach(ifp); return error; } /* persist-specific routines */ vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; netmap_adapter_get(&vpna->up); + NM_ATTACH_NA(ifp, &vpna->up); NMG_UNLOCK(); D("created %s", ifp->if_xname); return 0; @@ -608,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) /* first try to see if this is a bridge port. */ NMG_LOCK_ASSERT(); - if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) { + if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) { return 0; /* no error, but no VALE prefix */ } @@ -693,7 +728,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) goto out; vpna = hw->na_vp; hostna = hw->na_hostvp; - if_rele(ifp); if (nmr->nr_arg1 != NETMAP_BDG_HOST) hostna = NULL; } @@ -768,6 +802,11 @@ unlock_exit: return error; } +static inline int +nm_is_bwrap(struct netmap_adapter *na) +{ + return na->nm_register == netmap_bwrap_reg; +} /* process NETMAP_BDG_DETACH */ static int @@ -785,8 +824,13 @@ nm_bdg_ctl_detach(struct nmreq *nmr) if (na == NULL) { /* VALE prefix missing */ error = EINVAL; goto unlock_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + netmap_adapter_put(na); + goto unlock_exit; } - if (na->nm_bdg_ctl) { /* remove the port from bridge. The bwrap * also needs to put the hwna in normal mode @@ -801,6 +845,267 @@ unlock_exit: } +struct nm_bdg_polling_state; +struct +nm_bdg_kthread { + struct nm_kthread *nmk; + u_int qfirst; + u_int qlast; + struct nm_bdg_polling_state *bps; +}; + +struct nm_bdg_polling_state { + bool configured; + bool stopped; + struct netmap_bwrap_adapter *bna; + u_int reg; + u_int qfirst; + u_int qlast; + u_int cpu_from; + u_int ncpus; + struct nm_bdg_kthread *kthreads; +}; + +static void +netmap_bwrap_polling(void *data) +{ + struct nm_bdg_kthread *nbk = data; + struct netmap_bwrap_adapter *bna; + u_int qfirst, qlast, i; + struct netmap_kring *kring0, *kring; + + if (!nbk) + return; + qfirst = nbk->qfirst; + qlast = nbk->qlast; + bna = nbk->bps->bna; + kring0 = NMR(bna->hwna, NR_RX); + + for (i = qfirst; i < qlast; i++) { + kring = kring0 + i; + kring->nm_notify(kring, 0); + } +} + +static int +nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) +{ + struct nm_kthread_cfg kcfg; + int i, j; + + bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus, + M_DEVBUF, M_NOWAIT | M_ZERO); + if (bps->kthreads == NULL) + return ENOMEM; + + bzero(&kcfg, sizeof(kcfg)); + kcfg.worker_fn = netmap_bwrap_polling; + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC); + int affinity = bps->cpu_from + i; + + t->bps = bps; + t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; + t->qlast = all ? bps->qlast : t->qfirst + 1; + D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, + t->qlast); + + kcfg.type = i; + kcfg.worker_private = t; + t->nmk = nm_os_kthread_create(&kcfg); + if (t->nmk == NULL) { + goto cleanup; + } + nm_os_kthread_set_affinity(t->nmk, affinity); + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kthread_delete(t->nmk); + } + free(bps->kthreads, M_DEVBUF); + return EFAULT; +} + +/* a version of ptnetmap_start_kthreads() */ +static int +nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) +{ + int error, i, j; + + if (!bps) { + D("polling is not configured"); + return EFAULT; + } + bps->stopped = false; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + error = nm_os_kthread_start(t->nmk); + if (error) { + D("error in nm_kthread_start()"); + goto cleanup; + } + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kthread_stop(t->nmk); + } + bps->stopped = true; + return error; +} + +static void +nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) +{ + int i; + + if (!bps) + return; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kthread_stop(t->nmk); + nm_os_kthread_delete(t->nmk); + } + bps->stopped = true; +} + +static int +get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na, + struct nm_bdg_polling_state *bps) +{ + int req_cpus, avail_cpus, core_from; + u_int reg, i, qfirst, qlast; + + avail_cpus = nm_os_ncpus(); + req_cpus = nmr->nr_arg1; + + if (req_cpus == 0) { + D("req_cpus must be > 0"); + return EINVAL; + } else if (req_cpus >= avail_cpus) { + D("for safety, we need at least one core left in the system"); + return EINVAL; + } + reg = nmr->nr_flags & NR_REG_MASK; + i = nmr->nr_ringid & NETMAP_RING_MASK; + /* + * ONE_NIC: dedicate one core to one ring. If multiple cores + * are specified, consecutive rings are also polled. + * For example, if ringid=2 and 2 cores are given, + * ring 2 and 3 are polled by core 2 and 3, respectively. + * ALL_NIC: poll all the rings using a core specified by ringid. + * the number of cores must be 1. + */ + if (reg == NR_REG_ONE_NIC) { + if (i + req_cpus > nma_get_nrings(na, NR_RX)) { + D("only %d rings exist (ring %u-%u is given)", + nma_get_nrings(na, NR_RX), i, i+req_cpus); + return EINVAL; + } + qfirst = i; + qlast = qfirst + req_cpus; + core_from = qfirst; + } else if (reg == NR_REG_ALL_NIC) { + if (req_cpus != 1) { + D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus); + return EINVAL; + } + qfirst = 0; + qlast = nma_get_nrings(na, NR_RX); + core_from = i; + } else { + D("reg must be ALL_NIC or ONE_NIC"); + return EINVAL; + } + + bps->reg = reg; + bps->qfirst = qfirst; + bps->qlast = qlast; + bps->cpu_from = core_from; + bps->ncpus = req_cpus; + D("%s qfirst %u qlast %u cpu_from %u ncpus %u", + reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC", + qfirst, qlast, core_from, req_cpus); + return 0; +} + +static int +nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) +{ + struct nm_bdg_polling_state *bps; + struct netmap_bwrap_adapter *bna; + int error; + + bna = (struct netmap_bwrap_adapter *)na; + if (bna->na_polling_state) { + D("ERROR adapter already in polling mode"); + return EFAULT; + } + + bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO); + if (!bps) + return ENOMEM; + bps->configured = false; + bps->stopped = true; + + if (get_polling_cfg(nmr, na, bps)) { + free(bps, M_DEVBUF); + return EINVAL; + } + + if (nm_bdg_create_kthreads(bps)) { + free(bps, M_DEVBUF); + return EFAULT; + } + + bps->configured = true; + bna->na_polling_state = bps; + bps->bna = bna; + + /* disable interrupt if possible */ + if (bna->hwna->nm_intr) + bna->hwna->nm_intr(bna->hwna, 0); + /* start kthread now */ + error = nm_bdg_polling_start_kthreads(bps); + if (error) { + D("ERROR nm_bdg_polling_start_kthread()"); + free(bps->kthreads, M_DEVBUF); + free(bps, M_DEVBUF); + bna->na_polling_state = NULL; + if (bna->hwna->nm_intr) + bna->hwna->nm_intr(bna->hwna, 1); + } + return error; +} + +static int +nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; + struct nm_bdg_polling_state *bps; + + if (!bna->na_polling_state) { + D("ERROR adapter is not in polling mode"); + return EFAULT; + } + bps = bna->na_polling_state; + nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); + bps->configured = false; + free(bps, M_DEVBUF); + bna->na_polling_state = NULL; + /* reenable interrupt */ + if (bna->hwna->nm_intr) + bna->hwna->nm_intr(bna->hwna, 1); + return 0; +} /* Called by either user's context (netmap_ioctl()) * or external kernel modules (e.g., Openvswitch). @@ -843,7 +1148,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) case NETMAP_BDG_LIST: /* this is used to enumerate bridges and ports */ if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(name, NM_NAME, strlen(NM_NAME))) { + if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) { error = EINVAL; break; } @@ -855,7 +1160,9 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) break; } - error = ENOENT; + error = 0; + nmr->nr_arg1 = b - bridges; /* bridge index */ + nmr->nr_arg2 = NM_BDG_NOPORT; for (j = 0; j < b->bdg_active_ports; j++) { i = b->bdg_port_index[j]; vpna = b->bdg_ports[i]; @@ -867,10 +1174,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) * virtual port and a NIC, respectively */ if (!strcmp(vpna->up.name, name)) { - /* bridge index */ - nmr->nr_arg1 = b - bridges; nmr->nr_arg2 = i; /* port index */ - error = 0; break; } } @@ -937,10 +1241,34 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) error = netmap_get_bdg_na(nmr, &na, 0); if (na && !error) { vpna = (struct netmap_vp_adapter *)na; - vpna->virt_hdr_len = nmr->nr_arg1; - if (vpna->virt_hdr_len) + na->virt_hdr_len = nmr->nr_arg1; + if (na->virt_hdr_len) { vpna->mfs = NETMAP_BUF_SIZE(na); - D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna); + } + D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); + netmap_adapter_put(na); + } else if (!na) { + error = ENXIO; + } + NMG_UNLOCK(); + break; + + case NETMAP_BDG_POLLING_ON: + case NETMAP_BDG_POLLING_OFF: + NMG_LOCK(); + error = netmap_get_bdg_na(nmr, &na, 0); + if (na && !error) { + if (!nm_is_bwrap(na)) { + error = EOPNOTSUPP; + } else if (cmd == NETMAP_BDG_POLLING_ON) { + error = nm_bdg_ctl_polling_start(nmr, na); + if (!error) + netmap_adapter_get(na); + } else { + error = nm_bdg_ctl_polling_stop(nmr, na); + if (!error) + netmap_adapter_put(na); + } netmap_adapter_put(na); } NMG_UNLOCK(); @@ -1097,10 +1425,12 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end) ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); } if (frags > 1) { - D("truncate incomplete fragment at %d (%d frags)", ft_i, frags); - // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG - ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG; - ft[ft_i - frags].ft_frags = frags - 1; + /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we + * have to fix frags count. */ + frags--; + ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; + ft[ft_i - frags].ft_frags = frags; + D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); } if (ft_i) ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); @@ -1157,6 +1487,8 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; + enum txrx t; + int i; /* persistent ports may be put in netmap mode * before being attached to a bridge @@ -1164,12 +1496,30 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) if (vpna->na_bdg) BDG_WLOCK(vpna->na_bdg); if (onoff) { - na->na_flags |= NAF_NETMAP_ON; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + + if (nm_kring_pending_on(kring)) + kring->nr_mode = NKR_NETMAP_ON; + } + } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; /* XXX on FreeBSD, persistent VALE ports should also * toggle IFCAP_NETMAP in na->ifp (2014-03-16) */ } else { - na->na_flags &= ~NAF_NETMAP_ON; + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + + if (nm_kring_pending_off(kring)) + kring->nr_mode = NKR_NETMAP_OFF; + } + } } if (vpna->na_bdg) BDG_WUNLOCK(vpna->na_bdg); @@ -1193,13 +1543,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, uint32_t sh, dh; u_int dst, mysrc = na->bdg_port; uint64_t smac, dmac; + uint8_t indbuf[12]; /* safety check, unfortunately we have many cases */ - if (buf_len >= 14 + na->virt_hdr_len) { + if (buf_len >= 14 + na->up.virt_hdr_len) { /* virthdr + mac_hdr in the same slot */ - buf += na->virt_hdr_len; - buf_len -= na->virt_hdr_len; - } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { + buf += na->up.virt_hdr_len; + buf_len -= na->up.virt_hdr_len; + } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) { /* only header in first fragment */ ft++; buf = ft->ft_buf; @@ -1208,6 +1559,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, RD(5, "invalid buf format, length %d", buf_len); return NM_BDG_NOPORT; } + + if (ft->ft_flags & NS_INDIRECT) { + if (copyin(buf, indbuf, sizeof(indbuf))) { + return NM_BDG_NOPORT; + } + buf = indbuf; + } + dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; smac = le64toh(*(uint64_t *)(buf + 4)); smac >>= 16; @@ -1321,7 +1680,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, struct nm_bdg_q *dst_ents, *brddst; uint16_t num_dsts = 0, *dsts; struct nm_bridge *b = na->na_bdg; - u_int i, j, me = na->bdg_port; + u_int i, me = na->bdg_port; /* * The work area (pointed by ft) is followed by an array of @@ -1341,7 +1700,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, ND("slot %d frags %d", i, ft[i].ft_frags); /* Drop the packet if the virtio-net header is not into the first fragment nor at the very beginning of the second. */ - if (unlikely(na->virt_hdr_len > ft[i].ft_len)) + if (unlikely(na->up.virt_hdr_len > ft[i].ft_len)) continue; dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); if (netmap_verbose > 255) @@ -1382,6 +1741,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, */ brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; if (brddst->bq_head != NM_FT_NULL) { + u_int j; for (j = 0; likely(j < b->bdg_active_ports); j++) { uint16_t d_i; i = b->bdg_port_index[j]; @@ -1441,8 +1801,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, */ needed = d->bq_len + brddst->bq_len; - if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { - RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len); + if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { + RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, + dst_na->up.virt_hdr_len); /* There is a virtio-net header/offloadings mismatch between * source and destination. The slower mismatch datapath will * be used to cope with all the mismatches. @@ -1803,7 +2164,6 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter nm_bound_var(&nmr->nr_arg3, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); na->num_rx_desc = nmr->nr_rx_slots; - vpna->virt_hdr_len = 0; vpna->mfs = 1514; vpna->last_smac = ~0llu; /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? @@ -1880,19 +2240,17 @@ netmap_bwrap_dtor(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; struct netmap_adapter *hwna = bna->hwna; + struct nm_bridge *b = bna->up.na_bdg, + *bh = bna->host.na_bdg; + + if (b) { + netmap_bdg_detach_common(b, bna->up.bdg_port, + (bh ? bna->host.bdg_port : -1)); + } ND("na %p", na); - /* drop reference to hwna->ifp. - * If we don't do this, netmap_detach_common(na) - * will think it has set NA(na->ifp) to NULL - */ na->ifp = NULL; - /* for safety, also drop the possible reference - * in the hostna - */ bna->host.up.ifp = NULL; - - hwna->nm_mem = bna->save_nmd; hwna->na_private = NULL; hwna->na_vp = hwna->na_hostvp = NULL; hwna->na_flags &= ~NAF_BUSY; @@ -1916,7 +2274,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na) * (part as a receive ring, part as a transmit ring). * * callback that overwrites the hwna notify callback. - * Packets come from the outside or from the host stack and are put on an hwna rx ring. + * Packets come from the outside or from the host stack and are put on an + * hwna rx ring. * The bridge wrapper then sends the packets through the bridge. */ static int @@ -1927,19 +2286,18 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) struct netmap_kring *bkring; struct netmap_vp_adapter *vpna = &bna->up; u_int ring_nr = kring->ring_id; - int error = 0; + int ret = NM_IRQ_COMPLETED; + int error; if (netmap_verbose) D("%s %s 0x%x", na->name, kring->name, flags); - if (!nm_netmap_on(na)) - return 0; - bkring = &vpna->up.tx_rings[ring_nr]; /* make sure the ring is not disabled */ - if (nm_kr_tryget(kring)) - return 0; + if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { + return EIO; + } if (netmap_verbose) D("%s head %d cur %d tail %d", na->name, @@ -1951,9 +2309,10 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) error = kring->nm_sync(kring, 0); if (error) goto put_out; - if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) { - D("how strange, interrupt with no packets on %s", - na->name); + if (kring->nr_hwcur == kring->nr_hwtail) { + if (netmap_verbose) + D("how strange, interrupt with no packets on %s", + na->name); goto put_out; } @@ -1970,28 +2329,32 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) /* another call to actually release the buffers */ error = kring->nm_sync(kring, 0); + /* The second rxsync may have further advanced hwtail. If this happens, + * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ + if (kring->rcur != kring->nr_hwtail) { + ret = NM_IRQ_RESCHED; + } put_out: nm_kr_put(kring); - return error; + + return error ? error : ret; } /* nm_register callback for bwrap */ static int -netmap_bwrap_register(struct netmap_adapter *na, int onoff) +netmap_bwrap_reg(struct netmap_adapter *na, int onoff) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; struct netmap_vp_adapter *hostna = &bna->host; - int error; + int error, i; enum txrx t; ND("%s %s", na->name, onoff ? "on" : "off"); if (onoff) { - int i; - /* netmap_do_regif has been called on the bwrap na. * We need to pass the information about the * memory allocator down to the hwna before @@ -2010,16 +2373,32 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) /* cross-link the netmap rings * The original number of rings comes from hwna, * rx rings on one side equals tx rings on the other. - * We need to do this now, after the initialization - * of the kring->ring pointers */ for_rx_tx(t) { - enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(na, r) + 1; i++) { - NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots; - NMR(hwna, t)[i].ring = NMR(na, r)[i].ring; + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { + NMR(hwna, r)[i].ring = NMR(na, t)[i].ring; } } + + if (na->na_flags & NAF_HOST_RINGS) { + struct netmap_adapter *hna = &hostna->up; + /* the hostna rings are the host rings of the bwrap. + * The corresponding krings must point back to the + * hostna + */ + hna->tx_rings = &na->tx_rings[na->num_tx_rings]; + hna->tx_rings[0].na = hna; + hna->rx_rings = &na->rx_rings[na->num_rx_rings]; + hna->rx_rings[0].na = hna; + } + } + + /* pass down the pending ring state information */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) + NMR(hwna, t)[i].nr_pending_mode = + NMR(na, t)[i].nr_pending_mode; } /* forward the request to the hwna */ @@ -2027,6 +2406,13 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) if (error) return error; + /* copy up the current ring state information */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) + NMR(na, t)[i].nr_mode = + NMR(hwna, t)[i].nr_mode; + } + /* impersonate a netmap_vp_adapter */ netmap_vp_reg(na, onoff); if (hostna->na_bdg) @@ -2046,8 +2432,14 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) /* also intercept the host ring notify */ hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; } else { u_int i; + + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + /* reset all notify callbacks (including host ring) */ for (i = 0; i <= hwna->num_rx_rings; i++) { hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; @@ -2089,8 +2481,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; - struct netmap_adapter *hostna = &bna->host.up; - int error; + int i, error = 0; + enum txrx t; ND("%s", na->name); @@ -2102,26 +2494,23 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) /* also create the hwna krings */ error = hwna->nm_krings_create(hwna); if (error) { - netmap_vp_krings_delete(na); - return error; + goto err_del_vp_rings; } - /* the connection between the bwrap krings and the hwna krings - * will be perfomed later, in the nm_register callback, since - * now the kring->ring pointers have not been initialized yet - */ - if (na->na_flags & NAF_HOST_RINGS) { - /* the hostna rings are the host rings of the bwrap. - * The corresponding krings must point back to the - * hostna - */ - hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hostna->tx_rings[0].na = hostna; - hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hostna->rx_rings[0].na = hostna; + /* get each ring slot number from the corresponding hwna ring */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { + NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; + } } return 0; + +err_del_vp_rings: + netmap_vp_krings_delete(na); + + return error; } @@ -2149,7 +2538,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) u_int ring_n = kring->ring_id; u_int lim = kring->nkr_num_slots - 1; struct netmap_kring *hw_kring; - int error = 0; + int error; ND("%s: na %s hwna %s", (kring ? kring->name : "NULL!"), @@ -2157,11 +2546,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) (hwna ? hwna->name : "NULL!")); hw_kring = &hwna->tx_rings[ring_n]; - if (nm_kr_tryget(hw_kring)) - return 0; + if (nm_kr_tryget(hw_kring, 0, NULL)) { + return ENXIO; + } - if (!nm_netmap_on(hwna)) - return 0; /* first step: simulate a user wakeup on the rx ring */ netmap_vp_rxsync(kring, flags); ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", @@ -2175,7 +2563,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; error = hw_kring->nm_sync(hw_kring, flags); if (error) - goto out; + goto put_out; /* third step: now we are back the rx ring */ /* claim ownership on all hw owned bufs */ @@ -2188,9 +2576,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags) kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, ring->head, ring->cur, ring->tail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); -out: +put_out: nm_kr_put(hw_kring); - return error; + + return error ? error : NM_IRQ_COMPLETED; } @@ -2217,44 +2606,23 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) /* nothing to do */ return 0; } - npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); + npriv = netmap_priv_new(); if (npriv == NULL) return ENOMEM; - error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); + npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ + error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW); if (error) { - bzero(npriv, sizeof(*npriv)); - free(npriv, M_DEVBUF); + netmap_priv_delete(npriv); return error; } bna->na_kpriv = npriv; na->na_flags |= NAF_BUSY; } else { - int last_instance; - if (na->active_fds == 0) /* not registered */ return EINVAL; - last_instance = netmap_dtor_locked(bna->na_kpriv); - if (!last_instance) { - D("--- error, trying to detach an entry with active mmaps"); - error = EINVAL; - } else { - struct nm_bridge *b = bna->up.na_bdg, - *bh = bna->host.na_bdg; - npriv = bna->na_kpriv; - bna->na_kpriv = NULL; - D("deleting priv"); - - bzero(npriv, sizeof(*npriv)); - free(npriv, M_DEVBUF); - if (b) { - /* XXX the bwrap dtor should take care - * of this (2014-06-16) - */ - netmap_bdg_detach_common(b, bna->up.bdg_port, - (bh ? bna->host.bdg_port : -1)); - } - na->na_flags &= ~NAF_BUSY; - } + netmap_priv_delete(bna->na_kpriv); + bna->na_kpriv = NULL; + na->na_flags &= ~NAF_BUSY; } return error; @@ -2282,6 +2650,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) } na = &bna->up.up; + /* make bwrap ifp point to the real ifp */ + na->ifp = hwna->ifp; na->na_private = bna; strncpy(na->name, nr_name, sizeof(na->name)); /* fill the ring data for the bwrap adapter with rx/tx meanings @@ -2294,7 +2664,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); } na->nm_dtor = netmap_bwrap_dtor; - na->nm_register = netmap_bwrap_register; + na->nm_register = netmap_bwrap_reg; // na->nm_txsync = netmap_bwrap_txsync; // na->nm_rxsync = netmap_bwrap_rxsync; na->nm_config = netmap_bwrap_config; @@ -2303,13 +2673,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->nm_notify = netmap_bwrap_notify; na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; na->pdev = hwna->pdev; - na->nm_mem = netmap_mem_private_new(na->name, - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc, - 0, 0, &error); - na->na_flags |= NAF_MEM_OWNER; - if (na->nm_mem == NULL) - goto err_put; + na->nm_mem = hwna->nm_mem; + na->virt_hdr_len = hwna->virt_hdr_len; bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ bna->hwna = hwna; @@ -2349,24 +2714,10 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) if (error) { goto err_free; } - /* make bwrap ifp point to the real ifp - * NOTE: netmap_attach_common() interprets a non-NULL na->ifp - * as a request to make the ifp point to the na. Since we - * do not want to change the na already pointed to by hwna->ifp, - * the following assignment has to be delayed until now - */ - na->ifp = hwna->ifp; hwna->na_flags |= NAF_BUSY; - /* make hwna point to the allocator we are actually using, - * so that monitors will be able to find it - */ - bna->save_nmd = hwna->nm_mem; - hwna->nm_mem = na->nm_mem; return 0; err_free: - netmap_mem_delete(na->nm_mem); -err_put: hwna->na_vp = hwna->na_hostvp = NULL; netmap_adapter_put(hwna); free(bna, M_DEVBUF); |
